1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15 // Copyright (C) 2014, Itseez Inc, all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 //   * Redistribution's of source code must retain the above copyright notice,
22 //     this list of conditions and the following disclaimer.
23 //
24 //   * Redistribution's in binary form must reproduce the above copyright notice,
25 //     this list of conditions and the following disclaimer in the documentation
26 //     and/or other materials provided with the distribution.
27 //
28 //   * The name of the copyright holders may not be used to endorse or promote products
29 //     derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43 
44 #ifndef OPENCV_ML_HPP
45 #define OPENCV_ML_HPP
46 
47 #ifdef __cplusplus
48 #  include "opencv2/core.hpp"
49 #endif
50 
51 #ifdef __cplusplus
52 
53 #include <float.h>
54 #include <map>
55 #include <iostream>
56 
57 /**
58   @defgroup ml Machine Learning
59 
60   The Machine Learning Library (MLL) is a set of classes and functions for statistical
61   classification, regression, and clustering of data.
62 
63   Most of the classification and regression algorithms are implemented as C++ classes. As the
64   algorithms have different sets of features (like an ability to handle missing measurements or
65   categorical input variables), there is a little common ground between the classes. This common
66   ground is defined by the class cv::ml::StatModel that all the other ML classes are derived from.
67 
68   See detailed overview here: @ref ml_intro.
69  */
70 
71 namespace cv
72 {
73 
74 namespace ml
75 {
76 
77 //! @addtogroup ml
78 //! @{
79 
80 /** @brief Variable types */
81 enum VariableTypes
82 {
83     VAR_NUMERICAL    =0, //!< same as VAR_ORDERED
84     VAR_ORDERED      =0, //!< ordered variables
85     VAR_CATEGORICAL  =1  //!< categorical variables
86 };
87 
88 /** @brief %Error types */
89 enum ErrorTypes
90 {
91     TEST_ERROR = 0,
92     TRAIN_ERROR = 1
93 };
94 
95 /** @brief Sample types */
96 enum SampleTypes
97 {
98     ROW_SAMPLE = 0, //!< each training sample is a row of samples
99     COL_SAMPLE = 1  //!< each training sample occupies a column of samples
100 };
101 
102 /** @brief The structure represents the logarithmic grid range of statmodel parameters.
103 
104 It is used for optimizing statmodel accuracy by varying model parameters, the accuracy estimate
105 being computed by cross-validation.
106  */
107 class CV_EXPORTS_W ParamGrid
108 {
109 public:
110     /** @brief Default constructor */
111     ParamGrid();
112     /** @brief Constructor with parameters */
113     ParamGrid(double _minVal, double _maxVal, double _logStep);
114 
115     CV_PROP_RW double minVal; //!< Minimum value of the statmodel parameter. Default value is 0.
116     CV_PROP_RW double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0.
117     /** @brief Logarithmic step for iterating the statmodel parameter.
118 
119     The grid determines the following iteration sequence of the statmodel parameter values:
120     \f[(minVal, minVal*step, minVal*{step}^2, \dots,  minVal*{logStep}^n),\f]
121     where \f$n\f$ is the maximal index satisfying
122     \f[\texttt{minVal} * \texttt{logStep} ^n <  \texttt{maxVal}\f]
123     The grid is logarithmic, so logStep must always be greater than 1. Default value is 1.
124     */
125     CV_PROP_RW double logStep;
126 
127     /** @brief Creates a ParamGrid Ptr that can be given to the %SVM::trainAuto method
128 
129     @param minVal minimum value of the parameter grid
130     @param maxVal maximum value of the parameter grid
131     @param logstep Logarithmic step for iterating the statmodel parameter
132     */
133     CV_WRAP static Ptr<ParamGrid> create(double minVal=0., double maxVal=0., double logstep=1.);
134 };
135 
136 /** @brief Class encapsulating training data.
137 
138 Please note that the class only specifies the interface of training data, but not implementation.
139 All the statistical model classes in _ml_ module accepts Ptr\<TrainData\> as parameter. In other
140 words, you can create your own class derived from TrainData and pass smart pointer to the instance
141 of this class into StatModel::train.
142 
143 @sa @ref ml_intro_data
144  */
145 class CV_EXPORTS_W TrainData
146 {
147 public:
missingValue()148     static inline float missingValue() { return FLT_MAX; }
149     virtual ~TrainData();
150 
151     CV_WRAP virtual int getLayout() const = 0;
152     CV_WRAP virtual int getNTrainSamples() const = 0;
153     CV_WRAP virtual int getNTestSamples() const = 0;
154     CV_WRAP virtual int getNSamples() const = 0;
155     CV_WRAP virtual int getNVars() const = 0;
156     CV_WRAP virtual int getNAllVars() const = 0;
157 
158     CV_WRAP virtual void getSample(InputArray varIdx, int sidx, float* buf) const = 0;
159     CV_WRAP virtual Mat getSamples() const = 0;
160     CV_WRAP virtual Mat getMissing() const = 0;
161 
162     /** @brief Returns matrix of train samples
163 
164     @param layout The requested layout. If it's different from the initial one, the matrix is
165         transposed. See ml::SampleTypes.
166     @param compressSamples if true, the function returns only the training samples (specified by
167         sampleIdx)
168     @param compressVars if true, the function returns the shorter training samples, containing only
169         the active variables.
170 
171     In current implementation the function tries to avoid physical data copying and returns the
172     matrix stored inside TrainData (unless the transposition or compression is needed).
173      */
174     CV_WRAP virtual Mat getTrainSamples(int layout=ROW_SAMPLE,
175                                 bool compressSamples=true,
176                                 bool compressVars=true) const = 0;
177 
178     /** @brief Returns the vector of responses
179 
180     The function returns ordered or the original categorical responses. Usually it's used in
181     regression algorithms.
182      */
183     CV_WRAP virtual Mat getTrainResponses() const = 0;
184 
185     /** @brief Returns the vector of normalized categorical responses
186 
187     The function returns vector of responses. Each response is integer from `0` to `<number of
188     classes>-1`. The actual label value can be retrieved then from the class label vector, see
189     TrainData::getClassLabels.
190      */
191     CV_WRAP virtual Mat getTrainNormCatResponses() const = 0;
192     CV_WRAP virtual Mat getTestResponses() const = 0;
193     CV_WRAP virtual Mat getTestNormCatResponses() const = 0;
194     CV_WRAP virtual Mat getResponses() const = 0;
195     CV_WRAP virtual Mat getNormCatResponses() const = 0;
196     CV_WRAP virtual Mat getSampleWeights() const = 0;
197     CV_WRAP virtual Mat getTrainSampleWeights() const = 0;
198     CV_WRAP virtual Mat getTestSampleWeights() const = 0;
199     CV_WRAP virtual Mat getVarIdx() const = 0;
200     CV_WRAP virtual Mat getVarType() const = 0;
201     CV_WRAP virtual Mat getVarSymbolFlags() const = 0;
202     CV_WRAP virtual int getResponseType() const = 0;
203     CV_WRAP virtual Mat getTrainSampleIdx() const = 0;
204     CV_WRAP virtual Mat getTestSampleIdx() const = 0;
205     CV_WRAP virtual void getValues(int vi, InputArray sidx, float* values) const = 0;
206     virtual void getNormCatValues(int vi, InputArray sidx, int* values) const = 0;
207     CV_WRAP virtual Mat getDefaultSubstValues() const = 0;
208 
209     CV_WRAP virtual int getCatCount(int vi) const = 0;
210 
211     /** @brief Returns the vector of class labels
212 
213     The function returns vector of unique labels occurred in the responses.
214      */
215     CV_WRAP virtual Mat getClassLabels() const = 0;
216 
217     CV_WRAP virtual Mat getCatOfs() const = 0;
218     CV_WRAP virtual Mat getCatMap() const = 0;
219 
220     /** @brief Splits the training data into the training and test parts
221     @sa TrainData::setTrainTestSplitRatio
222      */
223     CV_WRAP virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;
224 
225     /** @brief Splits the training data into the training and test parts
226 
227     The function selects a subset of specified relative size and then returns it as the training
228     set. If the function is not called, all the data is used for training. Please, note that for
229     each of TrainData::getTrain\* there is corresponding TrainData::getTest\*, so that the test
230     subset can be retrieved and processed as well.
231     @sa TrainData::setTrainTestSplit
232      */
233     CV_WRAP virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
234     CV_WRAP virtual void shuffleTrainTest() = 0;
235 
236     /** @brief Returns matrix of test samples */
237     CV_WRAP virtual Mat getTestSamples() const = 0;
238 
239     /** @brief Returns vector of symbolic names captured in loadFromCSV() */
240     CV_WRAP virtual void getNames(std::vector<String>& names) const = 0;
241 
242     /** @brief Extract from 1D vector elements specified by passed indexes.
243     @param vec input vector (supported types: CV_32S, CV_32F, CV_64F)
244     @param idx 1D index vector
245      */
246     static CV_WRAP Mat getSubVector(const Mat& vec, const Mat& idx);
247 
248     /** @brief Extract from matrix rows/cols specified by passed indexes.
249     @param matrix input matrix (supported types: CV_32S, CV_32F, CV_64F)
250     @param idx 1D index vector
251     @param layout specifies to extract rows (cv::ml::ROW_SAMPLES) or to extract columns (cv::ml::COL_SAMPLES)
252      */
253     static CV_WRAP Mat getSubMatrix(const Mat& matrix, const Mat& idx, int layout);
254 
255     /** @brief Reads the dataset from a .csv file and returns the ready-to-use training data.
256 
257     @param filename The input file name
258     @param headerLineCount The number of lines in the beginning to skip; besides the header, the
259         function also skips empty lines and lines staring with `#`
260     @param responseStartIdx Index of the first output variable. If -1, the function considers the
261         last variable as the response
262     @param responseEndIdx Index of the last output variable + 1. If -1, then there is single
263         response variable at responseStartIdx.
264     @param varTypeSpec The optional text string that specifies the variables' types. It has the
265         format `ord[n1-n2,n3,n4-n5,...]cat[n6,n7-n8,...]`. That is, variables from `n1 to n2`
266         (inclusive range), `n3`, `n4 to n5` ... are considered ordered and `n6`, `n7 to n8` ... are
267         considered as categorical. The range `[n1..n2] + [n3] + [n4..n5] + ... + [n6] + [n7..n8]`
268         should cover all the variables. If varTypeSpec is not specified, then algorithm uses the
269         following rules:
270         - all input variables are considered ordered by default. If some column contains has non-
271           numerical values, e.g. 'apple', 'pear', 'apple', 'apple', 'mango', the corresponding
272           variable is considered categorical.
273         - if there are several output variables, they are all considered as ordered. Error is
274           reported when non-numerical values are used.
275         - if there is a single output variable, then if its values are non-numerical or are all
276           integers, then it's considered categorical. Otherwise, it's considered ordered.
277     @param delimiter The character used to separate values in each line.
278     @param missch The character used to specify missing measurements. It should not be a digit.
279         Although it's a non-numerical value, it surely does not affect the decision of whether the
280         variable ordered or categorical.
281     @note If the dataset only contains input variables and no responses, use responseStartIdx = -2
282         and responseEndIdx = 0. The output variables vector will just contain zeros.
283      */
284     static Ptr<TrainData> loadFromCSV(const String& filename,
285                                       int headerLineCount,
286                                       int responseStartIdx=-1,
287                                       int responseEndIdx=-1,
288                                       const String& varTypeSpec=String(),
289                                       char delimiter=',',
290                                       char missch='?');
291 
292     /** @brief Creates training data from in-memory arrays.
293 
294     @param samples matrix of samples. It should have CV_32F type.
295     @param layout see ml::SampleTypes.
296     @param responses matrix of responses. If the responses are scalar, they should be stored as a
297         single row or as a single column. The matrix should have type CV_32F or CV_32S (in the
298         former case the responses are considered as ordered by default; in the latter case - as
299         categorical)
300     @param varIdx vector specifying which variables to use for training. It can be an integer vector
301         (CV_32S) containing 0-based variable indices or byte vector (CV_8U) containing a mask of
302         active variables.
303     @param sampleIdx vector specifying which samples to use for training. It can be an integer
304         vector (CV_32S) containing 0-based sample indices or byte vector (CV_8U) containing a mask
305         of training samples.
306     @param sampleWeights optional vector with weights for each sample. It should have CV_32F type.
307     @param varType optional vector of type CV_8U and size `<number_of_variables_in_samples> +
308         <number_of_variables_in_responses>`, containing types of each input and output variable. See
309         ml::VariableTypes.
310      */
311     CV_WRAP static Ptr<TrainData> create(InputArray samples, int layout, InputArray responses,
312                                  InputArray varIdx=noArray(), InputArray sampleIdx=noArray(),
313                                  InputArray sampleWeights=noArray(), InputArray varType=noArray());
314 };
315 
316 /** @brief Base class for statistical models in OpenCV ML.
317  */
318 class CV_EXPORTS_W StatModel : public Algorithm
319 {
320 public:
321     /** Predict options */
322     enum Flags {
323         UPDATE_MODEL = 1,
324         RAW_OUTPUT=1, //!< makes the method return the raw results (the sum), not the class label
325         COMPRESSED_INPUT=2,
326         PREPROCESSED_INPUT=4
327     };
328 
329     /** @brief Returns the number of variables in training samples */
330     CV_WRAP virtual int getVarCount() const = 0;
331 
332     CV_WRAP virtual bool empty() const CV_OVERRIDE;
333 
334     /** @brief Returns true if the model is trained */
335     CV_WRAP virtual bool isTrained() const = 0;
336     /** @brief Returns true if the model is classifier */
337     CV_WRAP virtual bool isClassifier() const = 0;
338 
339     /** @brief Trains the statistical model
340 
341     @param trainData training data that can be loaded from file using TrainData::loadFromCSV or
342         created with TrainData::create.
343     @param flags optional flags, depending on the model. Some of the models can be updated with the
344         new training samples, not completely overwritten (such as NormalBayesClassifier or ANN_MLP).
345      */
346     CV_WRAP virtual bool train( const Ptr<TrainData>& trainData, int flags=0 );
347 
348     /** @brief Trains the statistical model
349 
350     @param samples training samples
351     @param layout See ml::SampleTypes.
352     @param responses vector of responses associated with the training samples.
353     */
354     CV_WRAP virtual bool train( InputArray samples, int layout, InputArray responses );
355 
356     /** @brief Computes error on the training or test dataset
357 
358     @param data the training data
359     @param test if true, the error is computed over the test subset of the data, otherwise it's
360         computed over the training subset of the data. Please note that if you loaded a completely
361         different dataset to evaluate already trained classifier, you will probably want not to set
362         the test subset at all with TrainData::setTrainTestSplitRatio and specify test=false, so
363         that the error is computed for the whole new set. Yes, this sounds a bit confusing.
364     @param resp the optional output responses.
365 
366     The method uses StatModel::predict to compute the error. For regression models the error is
367     computed as RMS, for classifiers - as a percent of missclassified samples (0%-100%).
368      */
369     CV_WRAP virtual float calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const;
370 
371     /** @brief Predicts response(s) for the provided sample(s)
372 
373     @param samples The input samples, floating-point matrix
374     @param results The optional output matrix of results.
375     @param flags The optional flags, model-dependent. See cv::ml::StatModel::Flags.
376      */
377     CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
378 
379     /** @brief Create and train model with default parameters
380 
381     The class must implement static `create()` method with no parameters or with all default parameter values
382     */
train(const Ptr<TrainData> & data,int flags=0)383     template<typename _Tp> static Ptr<_Tp> train(const Ptr<TrainData>& data, int flags=0)
384     {
385         Ptr<_Tp> model = _Tp::create();
386         return !model.empty() && model->train(data, flags) ? model : Ptr<_Tp>();
387     }
388 };
389 
390 /****************************************************************************************\
391 *                                 Normal Bayes Classifier                                *
392 \****************************************************************************************/
393 
394 /** @brief Bayes classifier for normally distributed data.
395 
396 @sa @ref ml_intro_bayes
397  */
398 class CV_EXPORTS_W NormalBayesClassifier : public StatModel
399 {
400 public:
401     /** @brief Predicts the response for sample(s).
402 
403     The method estimates the most probable classes for input vectors. Input vectors (one or more)
404     are stored as rows of the matrix inputs. In case of multiple input vectors, there should be one
405     output vector outputs. The predicted class for a single input vector is returned by the method.
406     The vector outputProbs contains the output probabilities corresponding to each element of
407     result.
408      */
409     CV_WRAP virtual float predictProb( InputArray inputs, OutputArray outputs,
410                                OutputArray outputProbs, int flags=0 ) const = 0;
411 
412     /** Creates empty model
413     Use StatModel::train to train the model after creation. */
414     CV_WRAP static Ptr<NormalBayesClassifier> create();
415 
416     /** @brief Loads and creates a serialized NormalBayesClassifier from a file
417      *
418      * Use NormalBayesClassifier::save to serialize and store an NormalBayesClassifier to disk.
419      * Load the NormalBayesClassifier from this file again, by calling this function with the path to the file.
420      * Optionally specify the node for the file containing the classifier
421      *
422      * @param filepath path to serialized NormalBayesClassifier
423      * @param nodeName name of node containing the classifier
424      */
425     CV_WRAP static Ptr<NormalBayesClassifier> load(const String& filepath , const String& nodeName = String());
426 };
427 
428 /****************************************************************************************\
429 *                          K-Nearest Neighbour Classifier                                *
430 \****************************************************************************************/
431 
432 /** @brief The class implements K-Nearest Neighbors model
433 
434 @sa @ref ml_intro_knn
435  */
436 class CV_EXPORTS_W KNearest : public StatModel
437 {
438 public:
439 
440     /** Default number of neighbors to use in predict method. */
441     /** @see setDefaultK */
442     CV_WRAP virtual int getDefaultK() const = 0;
443     /** @copybrief getDefaultK @see getDefaultK */
444     CV_WRAP virtual void setDefaultK(int val) = 0;
445 
446     /** Whether classification or regression model should be trained. */
447     /** @see setIsClassifier */
448     CV_WRAP virtual bool getIsClassifier() const = 0;
449     /** @copybrief getIsClassifier @see getIsClassifier */
450     CV_WRAP virtual void setIsClassifier(bool val) = 0;
451 
452     /** Parameter for KDTree implementation. */
453     /** @see setEmax */
454     CV_WRAP virtual int getEmax() const = 0;
455     /** @copybrief getEmax @see getEmax */
456     CV_WRAP virtual void setEmax(int val) = 0;
457 
458     /** %Algorithm type, one of KNearest::Types. */
459     /** @see setAlgorithmType */
460     CV_WRAP virtual int getAlgorithmType() const = 0;
461     /** @copybrief getAlgorithmType @see getAlgorithmType */
462     CV_WRAP virtual void setAlgorithmType(int val) = 0;
463 
464     /** @brief Finds the neighbors and predicts responses for input vectors.
465 
466     @param samples Input samples stored by rows. It is a single-precision floating-point matrix of
467         `<number_of_samples> * k` size.
468     @param k Number of used nearest neighbors. Should be greater than 1.
469     @param results Vector with results of prediction (regression or classification) for each input
470         sample. It is a single-precision floating-point vector with `<number_of_samples>` elements.
471     @param neighborResponses Optional output values for corresponding neighbors. It is a single-
472         precision floating-point matrix of `<number_of_samples> * k` size.
473     @param dist Optional output distances from the input vectors to the corresponding neighbors. It
474         is a single-precision floating-point matrix of `<number_of_samples> * k` size.
475 
476     For each input vector (a row of the matrix samples), the method finds the k nearest neighbors.
477     In case of regression, the predicted result is a mean value of the particular vector's neighbor
478     responses. In case of classification, the class is determined by voting.
479 
480     For each input vector, the neighbors are sorted by their distances to the vector.
481 
482     In case of C++ interface you can use output pointers to empty matrices and the function will
483     allocate memory itself.
484 
485     If only a single input vector is passed, all output matrices are optional and the predicted
486     value is returned by the method.
487 
488     The function is parallelized with the TBB library.
489      */
490     CV_WRAP virtual float findNearest( InputArray samples, int k,
491                                OutputArray results,
492                                OutputArray neighborResponses=noArray(),
493                                OutputArray dist=noArray() ) const = 0;
494 
495     /** @brief Implementations of KNearest algorithm
496        */
497     enum Types
498     {
499         BRUTE_FORCE=1,
500         KDTREE=2
501     };
502 
503     /** @brief Creates the empty model
504 
505     The static method creates empty %KNearest classifier. It should be then trained using StatModel::train method.
506      */
507     CV_WRAP static Ptr<KNearest> create();
508     /** @brief Loads and creates a serialized knearest from a file
509      *
510      * Use KNearest::save to serialize and store an KNearest to disk.
511      * Load the KNearest from this file again, by calling this function with the path to the file.
512      *
513      * @param filepath path to serialized KNearest
514      */
515     CV_WRAP static Ptr<KNearest> load(const String& filepath);
516 };
517 
518 /****************************************************************************************\
519 *                                   Support Vector Machines                              *
520 \****************************************************************************************/
521 
522 /** @brief Support Vector Machines.
523 
524 @sa @ref ml_intro_svm
525  */
526 class CV_EXPORTS_W SVM : public StatModel
527 {
528 public:
529 
530     class CV_EXPORTS Kernel : public Algorithm
531     {
532     public:
533         virtual int getType() const = 0;
534         virtual void calc( int vcount, int n, const float* vecs, const float* another, float* results ) = 0;
535     };
536 
537     /** Type of a %SVM formulation.
538     See SVM::Types. Default value is SVM::C_SVC. */
539     /** @see setType */
540     CV_WRAP virtual int getType() const = 0;
541     /** @copybrief getType @see getType */
542     CV_WRAP virtual void setType(int val) = 0;
543 
544     /** Parameter \f$\gamma\f$ of a kernel function.
545     For SVM::POLY, SVM::RBF, SVM::SIGMOID or SVM::CHI2. Default value is 1. */
546     /** @see setGamma */
547     CV_WRAP virtual double getGamma() const = 0;
548     /** @copybrief getGamma @see getGamma */
549     CV_WRAP virtual void setGamma(double val) = 0;
550 
551     /** Parameter _coef0_ of a kernel function.
552     For SVM::POLY or SVM::SIGMOID. Default value is 0.*/
553     /** @see setCoef0 */
554     CV_WRAP virtual double getCoef0() const = 0;
555     /** @copybrief getCoef0 @see getCoef0 */
556     CV_WRAP virtual void setCoef0(double val) = 0;
557 
558     /** Parameter _degree_ of a kernel function.
559     For SVM::POLY. Default value is 0. */
560     /** @see setDegree */
561     CV_WRAP virtual double getDegree() const = 0;
562     /** @copybrief getDegree @see getDegree */
563     CV_WRAP virtual void setDegree(double val) = 0;
564 
565     /** Parameter _C_ of a %SVM optimization problem.
566     For SVM::C_SVC, SVM::EPS_SVR or SVM::NU_SVR. Default value is 0. */
567     /** @see setC */
568     CV_WRAP virtual double getC() const = 0;
569     /** @copybrief getC @see getC */
570     CV_WRAP virtual void setC(double val) = 0;
571 
572     /** Parameter \f$\nu\f$ of a %SVM optimization problem.
573     For SVM::NU_SVC, SVM::ONE_CLASS or SVM::NU_SVR. Default value is 0. */
574     /** @see setNu */
575     CV_WRAP virtual double getNu() const = 0;
576     /** @copybrief getNu @see getNu */
577     CV_WRAP virtual void setNu(double val) = 0;
578 
579     /** Parameter \f$\epsilon\f$ of a %SVM optimization problem.
580     For SVM::EPS_SVR. Default value is 0. */
581     /** @see setP */
582     CV_WRAP virtual double getP() const = 0;
583     /** @copybrief getP @see getP */
584     CV_WRAP virtual void setP(double val) = 0;
585 
586     /** Optional weights in the SVM::C_SVC problem, assigned to particular classes.
587     They are multiplied by _C_ so the parameter _C_ of class _i_ becomes `classWeights(i) * C`. Thus
588     these weights affect the misclassification penalty for different classes. The larger weight,
589     the larger penalty on misclassification of data from the corresponding class. Default value is
590     empty Mat. */
591     /** @see setClassWeights */
592     CV_WRAP virtual cv::Mat getClassWeights() const = 0;
593     /** @copybrief getClassWeights @see getClassWeights */
594     CV_WRAP virtual void setClassWeights(const cv::Mat &val) = 0;
595 
596     /** Termination criteria of the iterative %SVM training procedure which solves a partial
597     case of constrained quadratic optimization problem.
598     You can specify tolerance and/or the maximum number of iterations. Default value is
599     `TermCriteria( TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, FLT_EPSILON )`; */
600     /** @see setTermCriteria */
601     CV_WRAP virtual cv::TermCriteria getTermCriteria() const = 0;
602     /** @copybrief getTermCriteria @see getTermCriteria */
603     CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
604 
605     /** Type of a %SVM kernel.
606     See SVM::KernelTypes. Default value is SVM::RBF. */
607     CV_WRAP virtual int getKernelType() const = 0;
608 
609     /** Initialize with one of predefined kernels.
610     See SVM::KernelTypes. */
611     CV_WRAP virtual void setKernel(int kernelType) = 0;
612 
613     /** Initialize with custom kernel.
614     See SVM::Kernel class for implementation details */
615     virtual void setCustomKernel(const Ptr<Kernel> &_kernel) = 0;
616 
617     //! %SVM type
618     enum Types {
619         /** C-Support Vector Classification. n-class classification (n \f$\geq\f$ 2), allows
620         imperfect separation of classes with penalty multiplier C for outliers. */
621         C_SVC=100,
622         /** \f$\nu\f$-Support Vector Classification. n-class classification with possible
623         imperfect separation. Parameter \f$\nu\f$ (in the range 0..1, the larger the value, the smoother
624         the decision boundary) is used instead of C. */
625         NU_SVC=101,
626         /** Distribution Estimation (One-class %SVM). All the training data are from
627         the same class, %SVM builds a boundary that separates the class from the rest of the feature
628         space. */
629         ONE_CLASS=102,
630         /** \f$\epsilon\f$-Support Vector Regression. The distance between feature vectors
631         from the training set and the fitting hyper-plane must be less than p. For outliers the
632         penalty multiplier C is used. */
633         EPS_SVR=103,
634         /** \f$\nu\f$-Support Vector Regression. \f$\nu\f$ is used instead of p.
635         See @cite LibSVM for details. */
636         NU_SVR=104
637     };
638 
639     /** @brief %SVM kernel type
640 
641     A comparison of different kernels on the following 2D test case with four classes. Four
642     SVM::C_SVC SVMs have been trained (one against rest) with auto_train. Evaluation on three
643     different kernels (SVM::CHI2, SVM::INTER, SVM::RBF). The color depicts the class with max score.
644     Bright means max-score \> 0, dark means max-score \< 0.
645     ![image](pics/SVM_Comparison.png)
646     */
647     enum KernelTypes {
648         /** Returned by SVM::getKernelType in case when custom kernel has been set */
649         CUSTOM=-1,
650         /** Linear kernel. No mapping is done, linear discrimination (or regression) is
651         done in the original feature space. It is the fastest option. \f$K(x_i, x_j) = x_i^T x_j\f$. */
652         LINEAR=0,
653         /** Polynomial kernel:
654         \f$K(x_i, x_j) = (\gamma x_i^T x_j + coef0)^{degree}, \gamma > 0\f$. */
655         POLY=1,
656         /** Radial basis function (RBF), a good choice in most cases.
657         \f$K(x_i, x_j) = e^{-\gamma ||x_i - x_j||^2}, \gamma > 0\f$. */
658         RBF=2,
659         /** Sigmoid kernel: \f$K(x_i, x_j) = \tanh(\gamma x_i^T x_j + coef0)\f$. */
660         SIGMOID=3,
661         /** Exponential Chi2 kernel, similar to the RBF kernel:
662         \f$K(x_i, x_j) = e^{-\gamma \chi^2(x_i,x_j)}, \chi^2(x_i,x_j) = (x_i-x_j)^2/(x_i+x_j), \gamma > 0\f$. */
663         CHI2=4,
664         /** Histogram intersection kernel. A fast kernel. \f$K(x_i, x_j) = min(x_i,x_j)\f$. */
665         INTER=5
666     };
667 
668     //! %SVM params type
669     enum ParamTypes {
670         C=0,
671         GAMMA=1,
672         P=2,
673         NU=3,
674         COEF=4,
675         DEGREE=5
676     };
677 
678     /** @brief Trains an %SVM with optimal parameters.
679 
680     @param data the training data that can be constructed using TrainData::create or
681         TrainData::loadFromCSV.
682     @param kFold Cross-validation parameter. The training set is divided into kFold subsets. One
683         subset is used to test the model, the others form the train set. So, the %SVM algorithm is
684         executed kFold times.
685     @param Cgrid grid for C
686     @param gammaGrid grid for gamma
687     @param pGrid grid for p
688     @param nuGrid grid for nu
689     @param coeffGrid grid for coeff
690     @param degreeGrid grid for degree
691     @param balanced If true and the problem is 2-class classification then the method creates more
692         balanced cross-validation subsets that is proportions between classes in subsets are close
693         to such proportion in the whole train dataset.
694 
695     The method trains the %SVM model automatically by choosing the optimal parameters C, gamma, p,
696     nu, coef0, degree. Parameters are considered optimal when the cross-validation
697     estimate of the test set error is minimal.
698 
699     If there is no need to optimize a parameter, the corresponding grid step should be set to any
700     value less than or equal to 1. For example, to avoid optimization in gamma, set `gammaGrid.step
701     = 0`, `gammaGrid.minVal`, `gamma_grid.maxVal` as arbitrary numbers. In this case, the value
702     `Gamma` is taken for gamma.
703 
704     And, finally, if the optimization in a parameter is required but the corresponding grid is
705     unknown, you may call the function SVM::getDefaultGrid. To generate a grid, for example, for
706     gamma, call `SVM::getDefaultGrid(SVM::GAMMA)`.
707 
708     This function works for the classification (SVM::C_SVC or SVM::NU_SVC) as well as for the
709     regression (SVM::EPS_SVR or SVM::NU_SVR). If it is SVM::ONE_CLASS, no optimization is made and
710     the usual %SVM with parameters specified in params is executed.
711      */
712     virtual bool trainAuto( const Ptr<TrainData>& data, int kFold = 10,
713                     ParamGrid Cgrid = getDefaultGrid(C),
714                     ParamGrid gammaGrid  = getDefaultGrid(GAMMA),
715                     ParamGrid pGrid      = getDefaultGrid(P),
716                     ParamGrid nuGrid     = getDefaultGrid(NU),
717                     ParamGrid coeffGrid  = getDefaultGrid(COEF),
718                     ParamGrid degreeGrid = getDefaultGrid(DEGREE),
719                     bool balanced=false) = 0;
720 
721     /** @brief Trains an %SVM with optimal parameters
722 
723     @param samples training samples
724     @param layout See ml::SampleTypes.
725     @param responses vector of responses associated with the training samples.
726     @param kFold Cross-validation parameter. The training set is divided into kFold subsets. One
727         subset is used to test the model, the others form the train set. So, the %SVM algorithm is
728     @param Cgrid grid for C
729     @param gammaGrid grid for gamma
730     @param pGrid grid for p
731     @param nuGrid grid for nu
732     @param coeffGrid grid for coeff
733     @param degreeGrid grid for degree
734     @param balanced If true and the problem is 2-class classification then the method creates more
735         balanced cross-validation subsets that is proportions between classes in subsets are close
736         to such proportion in the whole train dataset.
737 
738     The method trains the %SVM model automatically by choosing the optimal parameters C, gamma, p,
739     nu, coef0, degree. Parameters are considered optimal when the cross-validation
740     estimate of the test set error is minimal.
741 
742     This function only makes use of SVM::getDefaultGrid for parameter optimization and thus only
743     offers rudimentary parameter options.
744 
745     This function works for the classification (SVM::C_SVC or SVM::NU_SVC) as well as for the
746     regression (SVM::EPS_SVR or SVM::NU_SVR). If it is SVM::ONE_CLASS, no optimization is made and
747     the usual %SVM with parameters specified in params is executed.
748     */
749     CV_WRAP virtual bool trainAuto(InputArray samples,
750             int layout,
751             InputArray responses,
752             int kFold = 10,
753             Ptr<ParamGrid> Cgrid = SVM::getDefaultGridPtr(SVM::C),
754             Ptr<ParamGrid> gammaGrid  = SVM::getDefaultGridPtr(SVM::GAMMA),
755             Ptr<ParamGrid> pGrid      = SVM::getDefaultGridPtr(SVM::P),
756             Ptr<ParamGrid> nuGrid     = SVM::getDefaultGridPtr(SVM::NU),
757             Ptr<ParamGrid> coeffGrid  = SVM::getDefaultGridPtr(SVM::COEF),
758             Ptr<ParamGrid> degreeGrid = SVM::getDefaultGridPtr(SVM::DEGREE),
759             bool balanced=false) = 0;
760 
761     /** @brief Retrieves all the support vectors
762 
763     The method returns all the support vectors as a floating-point matrix, where support vectors are
764     stored as matrix rows.
765      */
766     CV_WRAP virtual Mat getSupportVectors() const = 0;
767 
768     /** @brief Retrieves all the uncompressed support vectors of a linear %SVM
769 
770     The method returns all the uncompressed support vectors of a linear %SVM that the compressed
771     support vector, used for prediction, was derived from. They are returned in a floating-point
772     matrix, where the support vectors are stored as matrix rows.
773      */
774     CV_WRAP virtual Mat getUncompressedSupportVectors() const = 0;
775 
776     /** @brief Retrieves the decision function
777 
778     @param i the index of the decision function. If the problem solved is regression, 1-class or
779         2-class classification, then there will be just one decision function and the index should
780         always be 0. Otherwise, in the case of N-class classification, there will be \f$N(N-1)/2\f$
781         decision functions.
782     @param alpha the optional output vector for weights, corresponding to different support vectors.
783         In the case of linear %SVM all the alpha's will be 1's.
784     @param svidx the optional output vector of indices of support vectors within the matrix of
785         support vectors (which can be retrieved by SVM::getSupportVectors). In the case of linear
786         %SVM each decision function consists of a single "compressed" support vector.
787 
788     The method returns rho parameter of the decision function, a scalar subtracted from the weighted
789     sum of kernel responses.
790      */
791     CV_WRAP virtual double getDecisionFunction(int i, OutputArray alpha, OutputArray svidx) const = 0;
792 
793     /** @brief Generates a grid for %SVM parameters.
794 
795     @param param_id %SVM parameters IDs that must be one of the SVM::ParamTypes. The grid is
796     generated for the parameter with this ID.
797 
798     The function generates a grid for the specified parameter of the %SVM algorithm. The grid may be
799     passed to the function SVM::trainAuto.
800      */
801     static ParamGrid getDefaultGrid( int param_id );
802 
803     /** @brief Generates a grid for %SVM parameters.
804 
805     @param param_id %SVM parameters IDs that must be one of the SVM::ParamTypes. The grid is
806     generated for the parameter with this ID.
807 
808     The function generates a grid pointer for the specified parameter of the %SVM algorithm.
809     The grid may be passed to the function SVM::trainAuto.
810      */
811     CV_WRAP static Ptr<ParamGrid> getDefaultGridPtr( int param_id );
812 
813     /** Creates empty model.
814     Use StatModel::train to train the model. Since %SVM has several parameters, you may want to
815     find the best parameters for your problem, it can be done with SVM::trainAuto. */
816     CV_WRAP static Ptr<SVM> create();
817 
818     /** @brief Loads and creates a serialized svm from a file
819      *
820      * Use SVM::save to serialize and store an SVM to disk.
821      * Load the SVM from this file again, by calling this function with the path to the file.
822      *
823      * @param filepath path to serialized svm
824      */
825     CV_WRAP static Ptr<SVM> load(const String& filepath);
826 };
827 
828 /****************************************************************************************\
829 *                              Expectation - Maximization                                *
830 \****************************************************************************************/
831 
832 /** @brief The class implements the Expectation Maximization algorithm.
833 
834 @sa @ref ml_intro_em
835  */
836 class CV_EXPORTS_W EM : public StatModel
837 {
838 public:
839     //! Type of covariation matrices
840     enum Types {
841         /** A scaled identity matrix \f$\mu_k * I\f$. There is the only
842         parameter \f$\mu_k\f$ to be estimated for each matrix. The option may be used in special cases,
843         when the constraint is relevant, or as a first step in the optimization (for example in case
844         when the data is preprocessed with PCA). The results of such preliminary estimation may be
845         passed again to the optimization procedure, this time with
846         covMatType=EM::COV_MAT_DIAGONAL. */
847         COV_MAT_SPHERICAL=0,
848         /** A diagonal matrix with positive diagonal elements. The number of
849         free parameters is d for each matrix. This is most commonly used option yielding good
850         estimation results. */
851         COV_MAT_DIAGONAL=1,
852         /** A symmetric positively defined matrix. The number of free
853         parameters in each matrix is about \f$d^2/2\f$. It is not recommended to use this option, unless
854         there is pretty accurate initial estimation of the parameters and/or a huge number of
855         training samples. */
856         COV_MAT_GENERIC=2,
857         COV_MAT_DEFAULT=COV_MAT_DIAGONAL
858     };
859 
860     //! Default parameters
861     enum {DEFAULT_NCLUSTERS=5, DEFAULT_MAX_ITERS=100};
862 
863     //! The initial step
864     enum {START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0};
865 
866     /** The number of mixture components in the Gaussian mixture model.
867     Default value of the parameter is EM::DEFAULT_NCLUSTERS=5. Some of %EM implementation could
868     determine the optimal number of mixtures within a specified value range, but that is not the
869     case in ML yet. */
870     /** @see setClustersNumber */
871     CV_WRAP virtual int getClustersNumber() const = 0;
872     /** @copybrief getClustersNumber @see getClustersNumber */
873     CV_WRAP virtual void setClustersNumber(int val) = 0;
874 
875     /** Constraint on covariance matrices which defines type of matrices.
876     See EM::Types. */
877     /** @see setCovarianceMatrixType */
878     CV_WRAP virtual int getCovarianceMatrixType() const = 0;
879     /** @copybrief getCovarianceMatrixType @see getCovarianceMatrixType */
880     CV_WRAP virtual void setCovarianceMatrixType(int val) = 0;
881 
882     /** The termination criteria of the %EM algorithm.
883     The %EM algorithm can be terminated by the number of iterations termCrit.maxCount (number of
884     M-steps) or when relative change of likelihood logarithm is less than termCrit.epsilon. Default
885     maximum number of iterations is EM::DEFAULT_MAX_ITERS=100. */
886     /** @see setTermCriteria */
887     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
888     /** @copybrief getTermCriteria @see getTermCriteria */
889     CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
890 
891     /** @brief Returns weights of the mixtures
892 
893     Returns vector with the number of elements equal to the number of mixtures.
894      */
895     CV_WRAP virtual Mat getWeights() const = 0;
896     /** @brief Returns the cluster centers (means of the Gaussian mixture)
897 
898     Returns matrix with the number of rows equal to the number of mixtures and number of columns
899     equal to the space dimensionality.
900      */
901     CV_WRAP virtual Mat getMeans() const = 0;
902     /** @brief Returns covariation matrices
903 
904     Returns vector of covariation matrices. Number of matrices is the number of gaussian mixtures,
905     each matrix is a square floating-point matrix NxN, where N is the space dimensionality.
906      */
907     CV_WRAP virtual void getCovs(CV_OUT std::vector<Mat>& covs) const = 0;
908 
909     /** @brief Returns posterior probabilities for the provided samples
910 
911     @param samples The input samples, floating-point matrix
912     @param results The optional output \f$ nSamples \times nClusters\f$ matrix of results. It contains
913     posterior probabilities for each sample from the input
914     @param flags This parameter will be ignored
915      */
916     CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const CV_OVERRIDE = 0;
917 
918     /** @brief Returns a likelihood logarithm value and an index of the most probable mixture component
919     for the given sample.
920 
921     @param sample A sample for classification. It should be a one-channel matrix of
922         \f$1 \times dims\f$ or \f$dims \times 1\f$ size.
923     @param probs Optional output matrix that contains posterior probabilities of each component
924         given the sample. It has \f$1 \times nclusters\f$ size and CV_64FC1 type.
925 
926     The method returns a two-element double vector. Zero element is a likelihood logarithm value for
927     the sample. First element is an index of the most probable mixture component for the given
928     sample.
929      */
930     CV_WRAP virtual Vec2d predict2(InputArray sample, OutputArray probs) const = 0;
931 
932     /** @brief Estimate the Gaussian mixture parameters from a samples set.
933 
934     This variation starts with Expectation step. Initial values of the model parameters will be
935     estimated by the k-means algorithm.
936 
937     Unlike many of the ML models, %EM is an unsupervised learning algorithm and it does not take
938     responses (class labels or function values) as input. Instead, it computes the *Maximum
939     Likelihood Estimate* of the Gaussian mixture parameters from an input sample set, stores all the
940     parameters inside the structure: \f$p_{i,k}\f$ in probs, \f$a_k\f$ in means , \f$S_k\f$ in
941     covs[k], \f$\pi_k\f$ in weights , and optionally computes the output "class label" for each
942     sample: \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most
943     probable mixture component for each sample).
944 
945     The trained model can be used further for prediction, just like any other classifier. The
946     trained model is similar to the NormalBayesClassifier.
947 
948     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
949         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
950         it will be converted to the inner matrix of such type for the further computing.
951     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
952         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
953     @param labels The optional output "class label" for each sample:
954         \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
955         mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
956     @param probs The optional output matrix that contains posterior probabilities of each Gaussian
957         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
958         CV_64FC1 type.
959      */
960     CV_WRAP virtual bool trainEM(InputArray samples,
961                          OutputArray logLikelihoods=noArray(),
962                          OutputArray labels=noArray(),
963                          OutputArray probs=noArray()) = 0;
964 
965     /** @brief Estimate the Gaussian mixture parameters from a samples set.
966 
967     This variation starts with Expectation step. You need to provide initial means \f$a_k\f$ of
968     mixture components. Optionally you can pass initial weights \f$\pi_k\f$ and covariance matrices
969     \f$S_k\f$ of mixture components.
970 
971     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
972         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
973         it will be converted to the inner matrix of such type for the further computing.
974     @param means0 Initial means \f$a_k\f$ of mixture components. It is a one-channel matrix of
975         \f$nclusters \times dims\f$ size. If the matrix does not have CV_64F type it will be
976         converted to the inner matrix of such type for the further computing.
977     @param covs0 The vector of initial covariance matrices \f$S_k\f$ of mixture components. Each of
978         covariance matrices is a one-channel matrix of \f$dims \times dims\f$ size. If the matrices
979         do not have CV_64F type they will be converted to the inner matrices of such type for the
980         further computing.
981     @param weights0 Initial weights \f$\pi_k\f$ of mixture components. It should be a one-channel
982         floating-point matrix with \f$1 \times nclusters\f$ or \f$nclusters \times 1\f$ size.
983     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
984         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
985     @param labels The optional output "class label" for each sample:
986         \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
987         mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
988     @param probs The optional output matrix that contains posterior probabilities of each Gaussian
989         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
990         CV_64FC1 type.
991     */
992     CV_WRAP virtual bool trainE(InputArray samples, InputArray means0,
993                         InputArray covs0=noArray(),
994                         InputArray weights0=noArray(),
995                         OutputArray logLikelihoods=noArray(),
996                         OutputArray labels=noArray(),
997                         OutputArray probs=noArray()) = 0;
998 
999     /** @brief Estimate the Gaussian mixture parameters from a samples set.
1000 
1001     This variation starts with Maximization step. You need to provide initial probabilities
1002     \f$p_{i,k}\f$ to use this option.
1003 
1004     @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
1005         one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
1006         it will be converted to the inner matrix of such type for the further computing.
1007     @param probs0 the probabilities
1008     @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
1009         each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
1010     @param labels The optional output "class label" for each sample:
1011         \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
1012         mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
1013     @param probs The optional output matrix that contains posterior probabilities of each Gaussian
1014         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
1015         CV_64FC1 type.
1016     */
1017     CV_WRAP virtual bool trainM(InputArray samples, InputArray probs0,
1018                         OutputArray logLikelihoods=noArray(),
1019                         OutputArray labels=noArray(),
1020                         OutputArray probs=noArray()) = 0;
1021 
1022     /** Creates empty %EM model.
1023     The model should be trained then using StatModel::train(traindata, flags) method. Alternatively, you
1024     can use one of the EM::train\* methods or load it from file using Algorithm::load\<EM\>(filename).
1025      */
1026     CV_WRAP static Ptr<EM> create();
1027 
1028     /** @brief Loads and creates a serialized EM from a file
1029      *
1030      * Use EM::save to serialize and store an EM to disk.
1031      * Load the EM from this file again, by calling this function with the path to the file.
1032      * Optionally specify the node for the file containing the classifier
1033      *
1034      * @param filepath path to serialized EM
1035      * @param nodeName name of node containing the classifier
1036      */
1037     CV_WRAP static Ptr<EM> load(const String& filepath , const String& nodeName = String());
1038 };
1039 
1040 /****************************************************************************************\
1041 *                                      Decision Tree                                     *
1042 \****************************************************************************************/
1043 
1044 /** @brief The class represents a single decision tree or a collection of decision trees.
1045 
1046 The current public interface of the class allows user to train only a single decision tree, however
1047 the class is capable of storing multiple decision trees and using them for prediction (by summing
1048 responses or using a voting schemes), and the derived from DTrees classes (such as RTrees and Boost)
1049 use this capability to implement decision tree ensembles.
1050 
1051 @sa @ref ml_intro_trees
1052 */
1053 class CV_EXPORTS_W DTrees : public StatModel
1054 {
1055 public:
1056     /** Predict options */
1057     enum Flags { PREDICT_AUTO=0, PREDICT_SUM=(1<<8), PREDICT_MAX_VOTE=(2<<8), PREDICT_MASK=(3<<8) };
1058 
1059     /** Cluster possible values of a categorical variable into K\<=maxCategories clusters to
1060     find a suboptimal split.
1061     If a discrete variable, on which the training procedure tries to make a split, takes more than
1062     maxCategories values, the precise best subset estimation may take a very long time because the
1063     algorithm is exponential. Instead, many decision trees engines (including our implementation)
1064     try to find sub-optimal split in this case by clustering all the samples into maxCategories
1065     clusters that is some categories are merged together. The clustering is applied only in n \>
1066     2-class classification problems for categorical variables with N \> max_categories possible
1067     values. In case of regression and 2-class classification the optimal split can be found
1068     efficiently without employing clustering, thus the parameter is not used in these cases.
1069     Default value is 10.*/
1070     /** @see setMaxCategories */
1071     CV_WRAP virtual int getMaxCategories() const = 0;
1072     /** @copybrief getMaxCategories @see getMaxCategories */
1073     CV_WRAP virtual void setMaxCategories(int val) = 0;
1074 
1075     /** The maximum possible depth of the tree.
1076     That is the training algorithms attempts to split a node while its depth is less than maxDepth.
1077     The root node has zero depth. The actual depth may be smaller if the other termination criteria
1078     are met (see the outline of the training procedure @ref ml_intro_trees "here"), and/or if the
1079     tree is pruned. Default value is INT_MAX.*/
1080     /** @see setMaxDepth */
1081     CV_WRAP virtual int getMaxDepth() const = 0;
1082     /** @copybrief getMaxDepth @see getMaxDepth */
1083     CV_WRAP virtual void setMaxDepth(int val) = 0;
1084 
1085     /** If the number of samples in a node is less than this parameter then the node will not be split.
1086 
1087     Default value is 10.*/
1088     /** @see setMinSampleCount */
1089     CV_WRAP virtual int getMinSampleCount() const = 0;
1090     /** @copybrief getMinSampleCount @see getMinSampleCount */
1091     CV_WRAP virtual void setMinSampleCount(int val) = 0;
1092 
1093     /** If CVFolds \> 1 then algorithms prunes the built decision tree using K-fold
1094     cross-validation procedure where K is equal to CVFolds.
1095     Default value is 10.*/
1096     /** @see setCVFolds */
1097     CV_WRAP virtual int getCVFolds() const = 0;
1098     /** @copybrief getCVFolds @see getCVFolds */
1099     CV_WRAP virtual void setCVFolds(int val) = 0;
1100 
1101     /** If true then surrogate splits will be built.
1102     These splits allow to work with missing data and compute variable importance correctly.
1103     Default value is false.
1104     @note currently it's not implemented.*/
1105     /** @see setUseSurrogates */
1106     CV_WRAP virtual bool getUseSurrogates() const = 0;
1107     /** @copybrief getUseSurrogates @see getUseSurrogates */
1108     CV_WRAP virtual void setUseSurrogates(bool val) = 0;
1109 
1110     /** If true then a pruning will be harsher.
1111     This will make a tree more compact and more resistant to the training data noise but a bit less
1112     accurate. Default value is true.*/
1113     /** @see setUse1SERule */
1114     CV_WRAP virtual bool getUse1SERule() const = 0;
1115     /** @copybrief getUse1SERule @see getUse1SERule */
1116     CV_WRAP virtual void setUse1SERule(bool val) = 0;
1117 
1118     /** If true then pruned branches are physically removed from the tree.
1119     Otherwise they are retained and it is possible to get results from the original unpruned (or
1120     pruned less aggressively) tree. Default value is true.*/
1121     /** @see setTruncatePrunedTree */
1122     CV_WRAP virtual bool getTruncatePrunedTree() const = 0;
1123     /** @copybrief getTruncatePrunedTree @see getTruncatePrunedTree */
1124     CV_WRAP virtual void setTruncatePrunedTree(bool val) = 0;
1125 
1126     /** Termination criteria for regression trees.
1127     If all absolute differences between an estimated value in a node and values of train samples
1128     in this node are less than this parameter then the node will not be split further. Default
1129     value is 0.01f*/
1130     /** @see setRegressionAccuracy */
1131     CV_WRAP virtual float getRegressionAccuracy() const = 0;
1132     /** @copybrief getRegressionAccuracy @see getRegressionAccuracy */
1133     CV_WRAP virtual void setRegressionAccuracy(float val) = 0;
1134 
1135     /** @brief The array of a priori class probabilities, sorted by the class label value.
1136 
1137     The parameter can be used to tune the decision tree preferences toward a certain class. For
1138     example, if you want to detect some rare anomaly occurrence, the training base will likely
1139     contain much more normal cases than anomalies, so a very good classification performance
1140     will be achieved just by considering every case as normal. To avoid this, the priors can be
1141     specified, where the anomaly probability is artificially increased (up to 0.5 or even
1142     greater), so the weight of the misclassified anomalies becomes much bigger, and the tree is
1143     adjusted properly.
1144 
1145     You can also think about this parameter as weights of prediction categories which determine
1146     relative weights that you give to misclassification. That is, if the weight of the first
1147     category is 1 and the weight of the second category is 10, then each mistake in predicting
1148     the second category is equivalent to making 10 mistakes in predicting the first category.
1149     Default value is empty Mat.*/
1150     /** @see setPriors */
1151     CV_WRAP virtual cv::Mat getPriors() const = 0;
1152     /** @copybrief getPriors @see getPriors */
1153     CV_WRAP virtual void setPriors(const cv::Mat &val) = 0;
1154 
1155     /** @brief The class represents a decision tree node.
1156      */
1157     class CV_EXPORTS Node
1158     {
1159     public:
1160         Node();
1161         double value; //!< Value at the node: a class label in case of classification or estimated
1162                       //!< function value in case of regression.
1163         int classIdx; //!< Class index normalized to 0..class_count-1 range and assigned to the
1164                       //!< node. It is used internally in classification trees and tree ensembles.
1165         int parent; //!< Index of the parent node
1166         int left; //!< Index of the left child node
1167         int right; //!< Index of right child node
1168         int defaultDir; //!< Default direction where to go (-1: left or +1: right). It helps in the
1169                         //!< case of missing values.
1170         int split; //!< Index of the first split
1171     };
1172 
1173     /** @brief The class represents split in a decision tree.
1174      */
1175     class CV_EXPORTS Split
1176     {
1177     public:
1178         Split();
1179         int varIdx; //!< Index of variable on which the split is created.
1180         bool inversed; //!< If true, then the inverse split rule is used (i.e. left and right
1181                        //!< branches are exchanged in the rule expressions below).
1182         float quality; //!< The split quality, a positive number. It is used to choose the best split.
1183         int next; //!< Index of the next split in the list of splits for the node
1184         float c; /**< The threshold value in case of split on an ordered variable.
1185                       The rule is:
1186                       @code{.none}
1187                       if var_value < c
1188                         then next_node <- left
1189                         else next_node <- right
1190                       @endcode */
1191         int subsetOfs; /**< Offset of the bitset used by the split on a categorical variable.
1192                             The rule is:
1193                             @code{.none}
1194                             if bitset[var_value] == 1
1195                                 then next_node <- left
1196                                 else next_node <- right
1197                             @endcode */
1198     };
1199 
1200     /** @brief Returns indices of root nodes
1201     */
1202     virtual const std::vector<int>& getRoots() const = 0;
1203     /** @brief Returns all the nodes
1204 
1205     all the node indices are indices in the returned vector
1206      */
1207     virtual const std::vector<Node>& getNodes() const = 0;
1208     /** @brief Returns all the splits
1209 
1210     all the split indices are indices in the returned vector
1211      */
1212     virtual const std::vector<Split>& getSplits() const = 0;
1213     /** @brief Returns all the bitsets for categorical splits
1214 
1215     Split::subsetOfs is an offset in the returned vector
1216      */
1217     virtual const std::vector<int>& getSubsets() const = 0;
1218 
1219     /** @brief Creates the empty model
1220 
1221     The static method creates empty decision tree with the specified parameters. It should be then
1222     trained using train method (see StatModel::train). Alternatively, you can load the model from
1223     file using Algorithm::load\<DTrees\>(filename).
1224      */
1225     CV_WRAP static Ptr<DTrees> create();
1226 
1227     /** @brief Loads and creates a serialized DTrees from a file
1228      *
1229      * Use DTree::save to serialize and store an DTree to disk.
1230      * Load the DTree from this file again, by calling this function with the path to the file.
1231      * Optionally specify the node for the file containing the classifier
1232      *
1233      * @param filepath path to serialized DTree
1234      * @param nodeName name of node containing the classifier
1235      */
1236     CV_WRAP static Ptr<DTrees> load(const String& filepath , const String& nodeName = String());
1237 };
1238 
1239 /****************************************************************************************\
1240 *                                   Random Trees Classifier                              *
1241 \****************************************************************************************/
1242 
1243 /** @brief The class implements the random forest predictor.
1244 
1245 @sa @ref ml_intro_rtrees
1246  */
1247 class CV_EXPORTS_W RTrees : public DTrees
1248 {
1249 public:
1250 
1251     /** If true then variable importance will be calculated and then it can be retrieved by RTrees::getVarImportance.
1252     Default value is false.*/
1253     /** @see setCalculateVarImportance */
1254     CV_WRAP virtual bool getCalculateVarImportance() const = 0;
1255     /** @copybrief getCalculateVarImportance @see getCalculateVarImportance */
1256     CV_WRAP virtual void setCalculateVarImportance(bool val) = 0;
1257 
1258     /** The size of the randomly selected subset of features at each tree node and that are used
1259     to find the best split(s).
1260     If you set it to 0 then the size will be set to the square root of the total number of
1261     features. Default value is 0.*/
1262     /** @see setActiveVarCount */
1263     CV_WRAP virtual int getActiveVarCount() const = 0;
1264     /** @copybrief getActiveVarCount @see getActiveVarCount */
1265     CV_WRAP virtual void setActiveVarCount(int val) = 0;
1266 
1267     /** The termination criteria that specifies when the training algorithm stops.
1268     Either when the specified number of trees is trained and added to the ensemble or when
1269     sufficient accuracy (measured as OOB error) is achieved. Typically the more trees you have the
1270     better the accuracy. However, the improvement in accuracy generally diminishes and asymptotes
1271     pass a certain number of trees. Also to keep in mind, the number of tree increases the
1272     prediction time linearly. Default value is TermCriteria(TermCriteria::MAX_ITERS +
1273     TermCriteria::EPS, 50, 0.1)*/
1274     /** @see setTermCriteria */
1275     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1276     /** @copybrief getTermCriteria @see getTermCriteria */
1277     CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
1278 
1279     /** Returns the variable importance array.
1280     The method returns the variable importance vector, computed at the training stage when
1281     CalculateVarImportance is set to true. If this flag was set to false, the empty matrix is
1282     returned.
1283      */
1284     CV_WRAP virtual Mat getVarImportance() const = 0;
1285 
1286     /** Returns the result of each individual tree in the forest.
1287     In case the model is a regression problem, the method will return each of the trees'
1288     results for each of the sample cases. If the model is a classifier, it will return
1289     a Mat with samples + 1 rows, where the first row gives the class number and the
1290     following rows return the votes each class had for each sample.
1291         @param samples Array containing the samples for which votes will be calculated.
1292         @param results Array where the result of the calculation will be written.
1293         @param flags Flags for defining the type of RTrees.
1294     */
1295     CV_WRAP virtual void getVotes(InputArray samples, OutputArray results, int flags) const = 0;
1296 
1297     /** Returns the OOB error value, computed at the training stage when calcOOBError is set to true.
1298      * If this flag was set to false, 0 is returned. The OOB error is also scaled by sample weighting.
1299      */
1300 #if CV_VERSION_MAJOR == 4
getOOBError() const1301     CV_WRAP virtual double getOOBError() const { return 0; }
1302 #else
1303     /*CV_WRAP*/ virtual double getOOBError() const = 0;
1304 #endif
1305 
1306     /** Creates the empty model.
1307     Use StatModel::train to train the model, StatModel::train to create and train the model,
1308     Algorithm::load to load the pre-trained model.
1309      */
1310     CV_WRAP static Ptr<RTrees> create();
1311 
1312     /** @brief Loads and creates a serialized RTree from a file
1313      *
1314      * Use RTree::save to serialize and store an RTree to disk.
1315      * Load the RTree from this file again, by calling this function with the path to the file.
1316      * Optionally specify the node for the file containing the classifier
1317      *
1318      * @param filepath path to serialized RTree
1319      * @param nodeName name of node containing the classifier
1320      */
1321     CV_WRAP static Ptr<RTrees> load(const String& filepath , const String& nodeName = String());
1322 };
1323 
1324 /****************************************************************************************\
1325 *                                   Boosted tree classifier                              *
1326 \****************************************************************************************/
1327 
1328 /** @brief Boosted tree classifier derived from DTrees
1329 
1330 @sa @ref ml_intro_boost
1331  */
1332 class CV_EXPORTS_W Boost : public DTrees
1333 {
1334 public:
1335     /** Type of the boosting algorithm.
1336     See Boost::Types. Default value is Boost::REAL. */
1337     /** @see setBoostType */
1338     CV_WRAP virtual int getBoostType() const = 0;
1339     /** @copybrief getBoostType @see getBoostType */
1340     CV_WRAP virtual void setBoostType(int val) = 0;
1341 
1342     /** The number of weak classifiers.
1343     Default value is 100. */
1344     /** @see setWeakCount */
1345     CV_WRAP virtual int getWeakCount() const = 0;
1346     /** @copybrief getWeakCount @see getWeakCount */
1347     CV_WRAP virtual void setWeakCount(int val) = 0;
1348 
1349     /** A threshold between 0 and 1 used to save computational time.
1350     Samples with summary weight \f$\leq 1 - weight_trim_rate\f$ do not participate in the *next*
1351     iteration of training. Set this parameter to 0 to turn off this functionality. Default value is 0.95.*/
1352     /** @see setWeightTrimRate */
1353     CV_WRAP virtual double getWeightTrimRate() const = 0;
1354     /** @copybrief getWeightTrimRate @see getWeightTrimRate */
1355     CV_WRAP virtual void setWeightTrimRate(double val) = 0;
1356 
1357     /** Boosting type.
1358     Gentle AdaBoost and Real AdaBoost are often the preferable choices. */
1359     enum Types {
1360         DISCRETE=0, //!< Discrete AdaBoost.
1361         REAL=1, //!< Real AdaBoost. It is a technique that utilizes confidence-rated predictions
1362                 //!< and works well with categorical data.
1363         LOGIT=2, //!< LogitBoost. It can produce good regression fits.
1364         GENTLE=3 //!< Gentle AdaBoost. It puts less weight on outlier data points and for that
1365                  //!<reason is often good with regression data.
1366     };
1367 
1368     /** Creates the empty model.
1369     Use StatModel::train to train the model, Algorithm::load\<Boost\>(filename) to load the pre-trained model. */
1370     CV_WRAP static Ptr<Boost> create();
1371 
1372     /** @brief Loads and creates a serialized Boost from a file
1373      *
1374      * Use Boost::save to serialize and store an RTree to disk.
1375      * Load the Boost from this file again, by calling this function with the path to the file.
1376      * Optionally specify the node for the file containing the classifier
1377      *
1378      * @param filepath path to serialized Boost
1379      * @param nodeName name of node containing the classifier
1380      */
1381     CV_WRAP static Ptr<Boost> load(const String& filepath , const String& nodeName = String());
1382 };
1383 
1384 /****************************************************************************************\
1385 *                                   Gradient Boosted Trees                               *
1386 \****************************************************************************************/
1387 
1388 /*class CV_EXPORTS_W GBTrees : public DTrees
1389 {
1390 public:
1391     struct CV_EXPORTS_W_MAP Params : public DTrees::Params
1392     {
1393         CV_PROP_RW int weakCount;
1394         CV_PROP_RW int lossFunctionType;
1395         CV_PROP_RW float subsamplePortion;
1396         CV_PROP_RW float shrinkage;
1397 
1398         Params();
1399         Params( int lossFunctionType, int weakCount, float shrinkage,
1400                 float subsamplePortion, int maxDepth, bool useSurrogates );
1401     };
1402 
1403     enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};
1404 
1405     virtual void setK(int k) = 0;
1406 
1407     virtual float predictSerial( InputArray samples,
1408                                  OutputArray weakResponses, int flags) const = 0;
1409 
1410     static Ptr<GBTrees> create(const Params& p);
1411 };*/
1412 
1413 /****************************************************************************************\
1414 *                              Artificial Neural Networks (ANN)                          *
1415 \****************************************************************************************/
1416 
1417 /////////////////////////////////// Multi-Layer Perceptrons //////////////////////////////
1418 
1419 /** @brief Artificial Neural Networks - Multi-Layer Perceptrons.
1420 
1421 Unlike many other models in ML that are constructed and trained at once, in the MLP model these
1422 steps are separated. First, a network with the specified topology is created using the non-default
1423 constructor or the method ANN_MLP::create. All the weights are set to zeros. Then, the network is
1424 trained using a set of input and output vectors. The training procedure can be repeated more than
1425 once, that is, the weights can be adjusted based on the new training data.
1426 
1427 Additional flags for StatModel::train are available: ANN_MLP::TrainFlags.
1428 
1429 @sa @ref ml_intro_ann
1430  */
1431 class CV_EXPORTS_W ANN_MLP : public StatModel
1432 {
1433 public:
1434     /** Available training methods */
1435     enum TrainingMethods {
1436         BACKPROP=0, //!< The back-propagation algorithm.
1437         RPROP = 1, //!< The RPROP algorithm. See @cite RPROP93 for details.
1438         ANNEAL = 2 //!< The simulated annealing algorithm. See @cite Kirkpatrick83 for details.
1439     };
1440 
1441     /** Sets training method and common parameters.
1442     @param method Default value is ANN_MLP::RPROP. See ANN_MLP::TrainingMethods.
1443     @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP and to initialT for ANN_MLP::ANNEAL.
1444     @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP and to finalT for ANN_MLP::ANNEAL.
1445     */
1446     CV_WRAP virtual void setTrainMethod(int method, double param1 = 0, double param2 = 0) = 0;
1447 
1448     /** Returns current training method */
1449     CV_WRAP virtual int getTrainMethod() const = 0;
1450 
1451     /** Initialize the activation function for each neuron.
1452     Currently the default and the only fully supported activation function is ANN_MLP::SIGMOID_SYM.
1453     @param type The type of activation function. See ANN_MLP::ActivationFunctions.
1454     @param param1 The first parameter of the activation function, \f$\alpha\f$. Default value is 0.
1455     @param param2 The second parameter of the activation function, \f$\beta\f$. Default value is 0.
1456     */
1457     CV_WRAP virtual void setActivationFunction(int type, double param1 = 0, double param2 = 0) = 0;
1458 
1459     /**  Integer vector specifying the number of neurons in each layer including the input and output layers.
1460     The very first element specifies the number of elements in the input layer.
1461     The last element - number of elements in the output layer. Default value is empty Mat.
1462     @sa getLayerSizes */
1463     CV_WRAP virtual void setLayerSizes(InputArray _layer_sizes) = 0;
1464 
1465     /**  Integer vector specifying the number of neurons in each layer including the input and output layers.
1466     The very first element specifies the number of elements in the input layer.
1467     The last element - number of elements in the output layer.
1468     @sa setLayerSizes */
1469     CV_WRAP virtual cv::Mat getLayerSizes() const = 0;
1470 
1471     /** Termination criteria of the training algorithm.
1472     You can specify the maximum number of iterations (maxCount) and/or how much the error could
1473     change between the iterations to make the algorithm continue (epsilon). Default value is
1474     TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01).*/
1475     /** @see setTermCriteria */
1476     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1477     /** @copybrief getTermCriteria @see getTermCriteria */
1478     CV_WRAP virtual void setTermCriteria(TermCriteria val) = 0;
1479 
1480     /** BPROP: Strength of the weight gradient term.
1481     The recommended value is about 0.1. Default value is 0.1.*/
1482     /** @see setBackpropWeightScale */
1483     CV_WRAP virtual double getBackpropWeightScale() const = 0;
1484     /** @copybrief getBackpropWeightScale @see getBackpropWeightScale */
1485     CV_WRAP virtual void setBackpropWeightScale(double val) = 0;
1486 
1487     /** BPROP: Strength of the momentum term (the difference between weights on the 2 previous iterations).
1488     This parameter provides some inertia to smooth the random fluctuations of the weights. It can
1489     vary from 0 (the feature is disabled) to 1 and beyond. The value 0.1 or so is good enough.
1490     Default value is 0.1.*/
1491     /** @see setBackpropMomentumScale */
1492     CV_WRAP virtual double getBackpropMomentumScale() const = 0;
1493     /** @copybrief getBackpropMomentumScale @see getBackpropMomentumScale */
1494     CV_WRAP virtual void setBackpropMomentumScale(double val) = 0;
1495 
1496     /** RPROP: Initial value \f$\Delta_0\f$ of update-values \f$\Delta_{ij}\f$.
1497     Default value is 0.1.*/
1498     /** @see setRpropDW0 */
1499     CV_WRAP virtual double getRpropDW0() const = 0;
1500     /** @copybrief getRpropDW0 @see getRpropDW0 */
1501     CV_WRAP virtual void setRpropDW0(double val) = 0;
1502 
1503     /** RPROP: Increase factor \f$\eta^+\f$.
1504     It must be \>1. Default value is 1.2.*/
1505     /** @see setRpropDWPlus */
1506     CV_WRAP virtual double getRpropDWPlus() const = 0;
1507     /** @copybrief getRpropDWPlus @see getRpropDWPlus */
1508     CV_WRAP virtual void setRpropDWPlus(double val) = 0;
1509 
1510     /** RPROP: Decrease factor \f$\eta^-\f$.
1511     It must be \<1. Default value is 0.5.*/
1512     /** @see setRpropDWMinus */
1513     CV_WRAP virtual double getRpropDWMinus() const = 0;
1514     /** @copybrief getRpropDWMinus @see getRpropDWMinus */
1515     CV_WRAP virtual void setRpropDWMinus(double val) = 0;
1516 
1517     /** RPROP: Update-values lower limit \f$\Delta_{min}\f$.
1518     It must be positive. Default value is FLT_EPSILON.*/
1519     /** @see setRpropDWMin */
1520     CV_WRAP virtual double getRpropDWMin() const = 0;
1521     /** @copybrief getRpropDWMin @see getRpropDWMin */
1522     CV_WRAP virtual void setRpropDWMin(double val) = 0;
1523 
1524     /** RPROP: Update-values upper limit \f$\Delta_{max}\f$.
1525     It must be \>1. Default value is 50.*/
1526     /** @see setRpropDWMax */
1527     CV_WRAP virtual double getRpropDWMax() const = 0;
1528     /** @copybrief getRpropDWMax @see getRpropDWMax */
1529     CV_WRAP virtual void setRpropDWMax(double val) = 0;
1530 
1531     /** ANNEAL: Update initial temperature.
1532     It must be \>=0. Default value is 10.*/
1533     /** @see setAnnealInitialT */
1534     CV_WRAP virtual double getAnnealInitialT() const = 0;
1535     /** @copybrief getAnnealInitialT @see getAnnealInitialT */
1536     CV_WRAP virtual void setAnnealInitialT(double val) = 0;
1537 
1538     /** ANNEAL: Update final temperature.
1539     It must be \>=0 and less than initialT. Default value is 0.1.*/
1540     /** @see setAnnealFinalT */
1541     CV_WRAP virtual double getAnnealFinalT() const = 0;
1542     /** @copybrief getAnnealFinalT @see getAnnealFinalT */
1543     CV_WRAP virtual void setAnnealFinalT(double val) = 0;
1544 
1545     /** ANNEAL: Update cooling ratio.
1546     It must be \>0 and less than 1. Default value is 0.95.*/
1547     /** @see setAnnealCoolingRatio */
1548     CV_WRAP virtual double getAnnealCoolingRatio() const = 0;
1549     /** @copybrief getAnnealCoolingRatio @see getAnnealCoolingRatio */
1550     CV_WRAP virtual void setAnnealCoolingRatio(double val) = 0;
1551 
1552     /** ANNEAL: Update iteration per step.
1553     It must be \>0 . Default value is 10.*/
1554     /** @see setAnnealItePerStep */
1555     CV_WRAP virtual int getAnnealItePerStep() const = 0;
1556     /** @copybrief getAnnealItePerStep @see getAnnealItePerStep */
1557     CV_WRAP virtual void setAnnealItePerStep(int val) = 0;
1558 
1559     /** @brief Set/initialize anneal RNG */
1560     virtual void setAnnealEnergyRNG(const RNG& rng) = 0;
1561 
1562     /** possible activation functions */
1563     enum ActivationFunctions {
1564         /** Identity function: \f$f(x)=x\f$ */
1565         IDENTITY = 0,
1566         /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x})\f$
1567         @note
1568         If you are using the default sigmoid activation function with the default parameter values
1569         fparam1=0 and fparam2=0 then the function used is y = 1.7159\*tanh(2/3 \* x), so the output
1570         will range from [-1.7159, 1.7159], instead of [0,1].*/
1571         SIGMOID_SYM = 1,
1572         /** Gaussian function: \f$f(x)=\beta e^{-\alpha x*x}\f$ */
1573         GAUSSIAN = 2,
1574         /** ReLU function: \f$f(x)=max(0,x)\f$ */
1575         RELU = 3,
1576         /** Leaky ReLU function: for x>0 \f$f(x)=x \f$ and x<=0 \f$f(x)=\alpha x \f$*/
1577         LEAKYRELU= 4
1578     };
1579 
1580     /** Train options */
1581     enum TrainFlags {
1582         /** Update the network weights, rather than compute them from scratch. In the latter case
1583         the weights are initialized using the Nguyen-Widrow algorithm. */
1584         UPDATE_WEIGHTS = 1,
1585         /** Do not normalize the input vectors. If this flag is not set, the training algorithm
1586         normalizes each input feature independently, shifting its mean value to 0 and making the
1587         standard deviation equal to 1. If the network is assumed to be updated frequently, the new
1588         training data could be much different from original one. In this case, you should take care
1589         of proper normalization. */
1590         NO_INPUT_SCALE = 2,
1591         /** Do not normalize the output vectors. If the flag is not set, the training algorithm
1592         normalizes each output feature independently, by transforming it to the certain range
1593         depending on the used activation function. */
1594         NO_OUTPUT_SCALE = 4
1595     };
1596 
1597     CV_WRAP virtual Mat getWeights(int layerIdx) const = 0;
1598 
1599     /** @brief Creates empty model
1600 
1601     Use StatModel::train to train the model, Algorithm::load\<ANN_MLP\>(filename) to load the pre-trained model.
1602     Note that the train method has optional flags: ANN_MLP::TrainFlags.
1603      */
1604     CV_WRAP static Ptr<ANN_MLP> create();
1605 
1606     /** @brief Loads and creates a serialized ANN from a file
1607      *
1608      * Use ANN::save to serialize and store an ANN to disk.
1609      * Load the ANN from this file again, by calling this function with the path to the file.
1610      *
1611      * @param filepath path to serialized ANN
1612      */
1613     CV_WRAP static Ptr<ANN_MLP> load(const String& filepath);
1614 
1615 };
1616 
1617 #ifndef DISABLE_OPENCV_3_COMPATIBILITY
1618 typedef ANN_MLP ANN_MLP_ANNEAL;
1619 #endif
1620 
1621 /****************************************************************************************\
1622 *                           Logistic Regression                                          *
1623 \****************************************************************************************/
1624 
1625 /** @brief Implements Logistic Regression classifier.
1626 
1627 @sa @ref ml_intro_lr
1628  */
1629 class CV_EXPORTS_W LogisticRegression : public StatModel
1630 {
1631 public:
1632 
1633     /** Learning rate. */
1634     /** @see setLearningRate */
1635     CV_WRAP virtual double getLearningRate() const = 0;
1636     /** @copybrief getLearningRate @see getLearningRate */
1637     CV_WRAP virtual void setLearningRate(double val) = 0;
1638 
1639     /** Number of iterations. */
1640     /** @see setIterations */
1641     CV_WRAP virtual int getIterations() const = 0;
1642     /** @copybrief getIterations @see getIterations */
1643     CV_WRAP virtual void setIterations(int val) = 0;
1644 
1645     /** Kind of regularization to be applied. See LogisticRegression::RegKinds. */
1646     /** @see setRegularization */
1647     CV_WRAP virtual int getRegularization() const = 0;
1648     /** @copybrief getRegularization @see getRegularization */
1649     CV_WRAP virtual void setRegularization(int val) = 0;
1650 
1651     /** Kind of training method used. See LogisticRegression::Methods. */
1652     /** @see setTrainMethod */
1653     CV_WRAP virtual int getTrainMethod() const = 0;
1654     /** @copybrief getTrainMethod @see getTrainMethod */
1655     CV_WRAP virtual void setTrainMethod(int val) = 0;
1656 
1657     /** Specifies the number of training samples taken in each step of Mini-Batch Gradient
1658     Descent. Will only be used if using LogisticRegression::MINI_BATCH training algorithm. It
1659     has to take values less than the total number of training samples. */
1660     /** @see setMiniBatchSize */
1661     CV_WRAP virtual int getMiniBatchSize() const = 0;
1662     /** @copybrief getMiniBatchSize @see getMiniBatchSize */
1663     CV_WRAP virtual void setMiniBatchSize(int val) = 0;
1664 
1665     /** Termination criteria of the algorithm. */
1666     /** @see setTermCriteria */
1667     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1668     /** @copybrief getTermCriteria @see getTermCriteria */
1669     CV_WRAP virtual void setTermCriteria(TermCriteria val) = 0;
1670 
1671     //! Regularization kinds
1672     enum RegKinds {
1673         REG_DISABLE = -1, //!< Regularization disabled
1674         REG_L1 = 0, //!< %L1 norm
1675         REG_L2 = 1 //!< %L2 norm
1676     };
1677 
1678     //! Training methods
1679     enum Methods {
1680         BATCH = 0,
1681         MINI_BATCH = 1 //!< Set MiniBatchSize to a positive integer when using this method.
1682     };
1683 
1684     /** @brief Predicts responses for input samples and returns a float type.
1685 
1686     @param samples The input data for the prediction algorithm. Matrix [m x n], where each row
1687         contains variables (features) of one object being classified. Should have data type CV_32F.
1688     @param results Predicted labels as a column matrix of type CV_32S.
1689     @param flags Not used.
1690      */
1691     CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const CV_OVERRIDE = 0;
1692 
1693     /** @brief This function returns the trained parameters arranged across rows.
1694 
1695     For a two class classification problem, it returns a row matrix. It returns learnt parameters of
1696     the Logistic Regression as a matrix of type CV_32F.
1697      */
1698     CV_WRAP virtual Mat get_learnt_thetas() const = 0;
1699 
1700     /** @brief Creates empty model.
1701 
1702     Creates Logistic Regression model with parameters given.
1703      */
1704     CV_WRAP static Ptr<LogisticRegression> create();
1705 
1706     /** @brief Loads and creates a serialized LogisticRegression from a file
1707      *
1708      * Use LogisticRegression::save to serialize and store an LogisticRegression to disk.
1709      * Load the LogisticRegression from this file again, by calling this function with the path to the file.
1710      * Optionally specify the node for the file containing the classifier
1711      *
1712      * @param filepath path to serialized LogisticRegression
1713      * @param nodeName name of node containing the classifier
1714      */
1715     CV_WRAP static Ptr<LogisticRegression> load(const String& filepath , const String& nodeName = String());
1716 };
1717 
1718 
1719 /****************************************************************************************\
1720 *                        Stochastic Gradient Descent SVM Classifier                      *
1721 \****************************************************************************************/
1722 
1723 /*!
1724 @brief Stochastic Gradient Descent SVM classifier
1725 
1726 SVMSGD provides a fast and easy-to-use implementation of the SVM classifier using the Stochastic Gradient Descent approach,
1727 as presented in @cite bottou2010large.
1728 
1729 The classifier has following parameters:
1730 - model type,
1731 - margin type,
1732 - margin regularization (\f$\lambda\f$),
1733 - initial step size (\f$\gamma_0\f$),
1734 - step decreasing power (\f$c\f$),
1735 - and termination criteria.
1736 
1737 The model type may have one of the following values: \ref SGD and \ref ASGD.
1738 
1739 - \ref SGD is the classic version of SVMSGD classifier: every next step is calculated by the formula
1740   \f[w_{t+1} = w_t - \gamma(t) \frac{dQ_i}{dw} |_{w = w_t}\f]
1741   where
1742   - \f$w_t\f$ is the weights vector for decision function at step \f$t\f$,
1743   - \f$\gamma(t)\f$ is the step size of model parameters at the iteration \f$t\f$, it is decreased on each step by the formula
1744     \f$\gamma(t) = \gamma_0  (1 + \lambda  \gamma_0 t) ^ {-c}\f$
1745   - \f$Q_i\f$ is the target functional from SVM task for sample with number \f$i\f$, this sample is chosen stochastically on each step of the algorithm.
1746 
1747 - \ref ASGD is Average Stochastic Gradient Descent SVM Classifier. ASGD classifier averages weights vector on each step of algorithm by the formula
1748 \f$\widehat{w}_{t+1} = \frac{t}{1+t}\widehat{w}_{t} + \frac{1}{1+t}w_{t+1}\f$
1749 
1750 The recommended model type is ASGD (following @cite bottou2010large).
1751 
1752 The margin type may have one of the following values: \ref SOFT_MARGIN or \ref HARD_MARGIN.
1753 
1754 - You should use \ref HARD_MARGIN type, if you have linearly separable sets.
1755 - You should use \ref SOFT_MARGIN type, if you have non-linearly separable sets or sets with outliers.
1756 - In the general case (if you know nothing about linear separability of your sets), use SOFT_MARGIN.
1757 
1758 The other parameters may be described as follows:
1759 - Margin regularization parameter is responsible for weights decreasing at each step and for the strength of restrictions on outliers
1760   (the less the parameter, the less probability that an outlier will be ignored).
1761   Recommended value for SGD model is 0.0001, for ASGD model is 0.00001.
1762 
1763 - Initial step size parameter is the initial value for the step size \f$\gamma(t)\f$.
1764   You will have to find the best initial step for your problem.
1765 
1766 - Step decreasing power is the power parameter for \f$\gamma(t)\f$ decreasing by the formula, mentioned above.
1767   Recommended value for SGD model is 1, for ASGD model is 0.75.
1768 
1769 - Termination criteria can be TermCriteria::COUNT, TermCriteria::EPS or TermCriteria::COUNT + TermCriteria::EPS.
1770   You will have to find the best termination criteria for your problem.
1771 
1772 Note that the parameters margin regularization, initial step size, and step decreasing power should be positive.
1773 
1774 To use SVMSGD algorithm do as follows:
1775 
1776 - first, create the SVMSGD object. The algorithm will set optimal parameters by default, but you can set your own parameters via functions setSvmsgdType(),
1777   setMarginType(), setMarginRegularization(), setInitialStepSize(), and setStepDecreasingPower().
1778 
1779 - then the SVM model can be trained using the train features and the correspondent labels by the method train().
1780 
1781 - after that, the label of a new feature vector can be predicted using the method predict().
1782 
1783 @code
1784 // Create empty object
1785 cv::Ptr<SVMSGD> svmsgd = SVMSGD::create();
1786 
1787 // Train the Stochastic Gradient Descent SVM
1788 svmsgd->train(trainData);
1789 
1790 // Predict labels for the new samples
1791 svmsgd->predict(samples, responses);
1792 @endcode
1793 
1794 */
1795 
1796 class CV_EXPORTS_W SVMSGD : public cv::ml::StatModel
1797 {
1798 public:
1799 
1800     /** SVMSGD type.
1801     ASGD is often the preferable choice. */
1802     enum SvmsgdType
1803     {
1804         SGD, //!< Stochastic Gradient Descent
1805         ASGD //!< Average Stochastic Gradient Descent
1806     };
1807 
1808     /** Margin type.*/
1809     enum MarginType
1810     {
1811         SOFT_MARGIN, //!< General case, suits to the case of non-linearly separable sets, allows outliers.
1812         HARD_MARGIN  //!< More accurate for the case of linearly separable sets.
1813     };
1814 
1815     /**
1816      * @return the weights of the trained model (decision function f(x) = weights * x + shift).
1817     */
1818     CV_WRAP virtual Mat getWeights() = 0;
1819 
1820     /**
1821      * @return the shift of the trained model (decision function f(x) = weights * x + shift).
1822     */
1823     CV_WRAP virtual float getShift() = 0;
1824 
1825     /** @brief Creates empty model.
1826      * Use StatModel::train to train the model. Since %SVMSGD has several parameters, you may want to
1827      * find the best parameters for your problem or use setOptimalParameters() to set some default parameters.
1828     */
1829     CV_WRAP static Ptr<SVMSGD> create();
1830 
1831     /** @brief Loads and creates a serialized SVMSGD from a file
1832      *
1833      * Use SVMSGD::save to serialize and store an SVMSGD to disk.
1834      * Load the SVMSGD from this file again, by calling this function with the path to the file.
1835      * Optionally specify the node for the file containing the classifier
1836      *
1837      * @param filepath path to serialized SVMSGD
1838      * @param nodeName name of node containing the classifier
1839      */
1840     CV_WRAP static Ptr<SVMSGD> load(const String& filepath , const String& nodeName = String());
1841 
1842     /** @brief Function sets optimal parameters values for chosen SVM SGD model.
1843      * @param svmsgdType is the type of SVMSGD classifier.
1844      * @param marginType is the type of margin constraint.
1845     */
1846     CV_WRAP virtual void setOptimalParameters(int svmsgdType = SVMSGD::ASGD, int marginType = SVMSGD::SOFT_MARGIN) = 0;
1847 
1848     /** @brief %Algorithm type, one of SVMSGD::SvmsgdType. */
1849     /** @see setSvmsgdType */
1850     CV_WRAP virtual int getSvmsgdType() const = 0;
1851     /** @copybrief getSvmsgdType @see getSvmsgdType */
1852     CV_WRAP virtual void setSvmsgdType(int svmsgdType) = 0;
1853 
1854     /** @brief %Margin type, one of SVMSGD::MarginType. */
1855     /** @see setMarginType */
1856     CV_WRAP virtual int getMarginType() const = 0;
1857     /** @copybrief getMarginType @see getMarginType */
1858     CV_WRAP virtual void setMarginType(int marginType) = 0;
1859 
1860     /** @brief Parameter marginRegularization of a %SVMSGD optimization problem. */
1861     /** @see setMarginRegularization */
1862     CV_WRAP virtual float getMarginRegularization() const = 0;
1863     /** @copybrief getMarginRegularization @see getMarginRegularization */
1864     CV_WRAP virtual void setMarginRegularization(float marginRegularization) = 0;
1865 
1866     /** @brief Parameter initialStepSize of a %SVMSGD optimization problem. */
1867     /** @see setInitialStepSize */
1868     CV_WRAP virtual float getInitialStepSize() const = 0;
1869     /** @copybrief getInitialStepSize @see getInitialStepSize */
1870     CV_WRAP virtual void setInitialStepSize(float InitialStepSize) = 0;
1871 
1872     /** @brief Parameter stepDecreasingPower of a %SVMSGD optimization problem. */
1873     /** @see setStepDecreasingPower */
1874     CV_WRAP virtual float getStepDecreasingPower() const = 0;
1875     /** @copybrief getStepDecreasingPower @see getStepDecreasingPower */
1876     CV_WRAP virtual void setStepDecreasingPower(float stepDecreasingPower) = 0;
1877 
1878     /** @brief Termination criteria of the training algorithm.
1879     You can specify the maximum number of iterations (maxCount) and/or how much the error could
1880     change between the iterations to make the algorithm continue (epsilon).*/
1881     /** @see setTermCriteria */
1882     CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
1883     /** @copybrief getTermCriteria @see getTermCriteria */
1884     CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
1885 };
1886 
1887 
1888 /****************************************************************************************\
1889 *                           Auxiliary functions declarations                              *
1890 \****************************************************************************************/
1891 
1892 /** @brief Generates _sample_ from multivariate normal distribution
1893 
1894 @param mean an average row vector
1895 @param cov symmetric covariation matrix
1896 @param nsamples returned samples count
1897 @param samples returned samples array
1898 */
1899 CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, OutputArray samples);
1900 
1901 /** @brief Creates test set */
1902 CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses,
1903                                                 OutputArray samples, OutputArray responses);
1904 
1905 
1906 /****************************************************************************************\
1907 *                                   Simulated annealing solver                             *
1908 \****************************************************************************************/
1909 
1910 #ifdef CV_DOXYGEN
1911 /** @brief This class declares example interface for system state used in simulated annealing optimization algorithm.
1912 
1913 @note This class is not defined in C++ code and can't be use directly - you need your own implementation with the same methods.
1914 */
1915 struct SimulatedAnnealingSolverSystem
1916 {
1917     /** Give energy value for a state of system.*/
1918     double energy() const;
1919     /** Function which change the state of system (random perturbation).*/
1920     void changeState();
1921     /** Function to reverse to the previous state. Can be called once only after changeState(). */
1922     void reverseState();
1923 };
1924 #endif // CV_DOXYGEN
1925 
1926 /** @brief The class implements simulated annealing for optimization.
1927 
1928 @cite Kirkpatrick83 for details
1929 
1930 @param solverSystem optimization system (see SimulatedAnnealingSolverSystem)
1931 @param initialTemperature initial temperature
1932 @param finalTemperature final temperature
1933 @param coolingRatio temperature step multiplies
1934 @param iterationsPerStep number of iterations per temperature changing step
1935 @param lastTemperature optional output for last used temperature
1936 @param rngEnergy specify custom random numbers generator (cv::theRNG() by default)
1937 */
1938 template<class SimulatedAnnealingSolverSystem>
1939 int simulatedAnnealingSolver(SimulatedAnnealingSolverSystem& solverSystem,
1940      double initialTemperature, double finalTemperature, double coolingRatio,
1941      size_t iterationsPerStep,
1942      CV_OUT double* lastTemperature = NULL,
1943      cv::RNG& rngEnergy = cv::theRNG()
1944 );
1945 
1946 //! @} ml
1947 
1948 }
1949 }
1950 
1951 #include <opencv2/ml/ml.inl.hpp>
1952 
1953 #endif // __cplusplus
1954 #endif // OPENCV_ML_HPP
1955 
1956 /* End of file. */
1957