1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 //   * Redistribution's of source code must retain the above copyright notice,
22 //     this list of conditions and the following disclaimer.
23 //
24 //   * Redistribution's in binary form must reproduce the above copyright notice,
25 //     this list of conditions and the following disclaimer in the documentation
26 //     and/or other materials provided with the distribution.
27 //
28 //   * The name of the copyright holders may not be used to endorse or promote products
29 //     derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43 
44 #include "precomp.hpp"
45 #include "opencl_kernels_core.hpp"
46 #include "opencv2/core/opencl/runtime/opencl_clblas.hpp"
47 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
48 #include "intel_gpu_gemm.inl.hpp"
49 
50 #include "matmul.simd.hpp"
51 #include "matmul.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
52 
53 namespace cv
54 {
55 
56 /****************************************************************************************\
57 *                                         GEMM                                           *
58 \****************************************************************************************/
59 
60 #ifdef HAVE_CLAMDBLAS
61 
ocl_gemm_amdblas(InputArray matA,InputArray matB,double alpha,InputArray matC,double beta,OutputArray matD,int flags)62 static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
63                       InputArray matC, double beta, OutputArray matD, int flags )
64 {
65     int type = matA.type(), esz = CV_ELEM_SIZE(type);
66     bool haveC = matC.kind() != cv::_InputArray::NONE;
67     Size sizeA = matA.size(), sizeB = matB.size(), sizeC = haveC ? matC.size() : Size(0, 0);
68     bool atrans = (flags & GEMM_1_T) != 0, btrans = (flags & GEMM_2_T) != 0, ctrans = (flags & GEMM_3_T) != 0;
69 
70     if (atrans)
71         sizeA = Size(sizeA.height, sizeA.width);
72     if (btrans)
73         sizeB = Size(sizeB.height, sizeB.width);
74     if (haveC && ctrans)
75         sizeC = Size(sizeC.height, sizeC.width);
76 
77     Size sizeD(sizeB.width, sizeA.height);
78 
79     CV_Assert( matB.type() == type && (!haveC || matC.type() == type) );
80     CV_Assert( sizeA.width == sizeB.height && (!haveC || sizeC == sizeD) );
81 
82     matD.create(sizeD, type);
83     if ( matA.offset() % esz != 0 || matA.step() % esz != 0 ||
84          matB.offset() % esz != 0 || matB.step() % esz != 0 ||
85          (haveC && (matC.offset() % esz != 0 || matC.step() % esz != 0)) )
86         return false;
87 
88     UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
89     if (!ocl::internal::isCLBuffer(A) || !ocl::internal::isCLBuffer(B) || !ocl::internal::isCLBuffer(D))
90     {
91         return false;
92     }
93     if (haveC)
94     {
95         UMat C = matC.getUMat();
96         if (!ocl::internal::isCLBuffer(C))
97             return false;
98     }
99     if (haveC)
100         ctrans ? transpose(matC, D) : matC.copyTo(D);
101     else
102         D.setTo(Scalar::all(0));
103 
104     int M = sizeD.height, N = sizeD.width, K = sizeA.width;
105     int lda = (int)A.step / esz, ldb = (int)B.step / esz, ldc = (int)D.step / esz;
106     int offa = (int)A.offset / esz, offb = (int)B.offset / esz, offc = (int)D.offset / esz;
107 
108     cl_command_queue clq = (cl_command_queue)ocl::Queue::getDefault().ptr();
109     clblasTranspose transA = atrans ? clblasTrans : clblasNoTrans;
110     clblasTranspose transB = btrans ? clblasTrans : clblasNoTrans;
111     clblasOrder order = clblasRowMajor;
112     clblasStatus status = clblasSuccess;
113 
114     if (type == CV_32FC1)
115         status = clblasSgemm(order, transA, transB, M, N, K,
116                              (cl_float)alpha, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
117                              (const cl_mem)B.handle(ACCESS_READ), offb, ldb,
118                              (cl_float)beta, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
119                              1, &clq, 0, NULL, NULL);
120     else if (type == CV_64FC1)
121         status = clblasDgemm(order, transA, transB, M, N, K,
122                              alpha, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
123                              (const cl_mem)B.handle(ACCESS_READ), offb, ldb,
124                              beta, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
125                              1, &clq, 0, NULL, NULL);
126     else if (type == CV_32FC2)
127     {
128          cl_float2 alpha_2 = { { (cl_float)alpha, 0 } };
129          cl_float2 beta_2  = { { (cl_float)beta, 0 } };
130          status = clblasCgemm(order, transA, transB, M, N, K,
131                               alpha_2, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
132                               (const cl_mem)B.handle(ACCESS_READ), offb, ldb,
133                               beta_2, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
134                               1, &clq, 0, NULL, NULL);
135     }
136     else if (type == CV_64FC2)
137     {
138         cl_double2 alpha_2 = { { alpha, 0 } };
139         cl_double2 beta_2  = { { beta, 0 } };
140         status = clblasZgemm(order, transA, transB, M, N, K,
141                              alpha_2, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
142                              (const cl_mem)B.handle(ACCESS_READ), offb, ldb,
143                              beta_2, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
144                              1, &clq, 0, NULL, NULL);
145     }
146     else
147         CV_Error(Error::StsUnsupportedFormat, "");
148 
149     return status == clblasSuccess;
150 }
151 
152 #endif
153 
154 #ifdef HAVE_OPENCL
ocl_gemm(InputArray matA,InputArray matB,double alpha,InputArray matC,double beta,OutputArray matD,int flags)155 static bool ocl_gemm( InputArray matA, InputArray matB, double alpha,
156                       InputArray matC, double beta, OutputArray matD, int flags )
157 {
158     int depth = matA.depth(), cn = matA.channels();
159     int type = CV_MAKETYPE(depth, cn);
160 
161     CV_Assert_N( type == matB.type(), (type == CV_32FC1 || type == CV_64FC1 || type == CV_32FC2 || type == CV_64FC2) );
162 
163     const ocl::Device & dev = ocl::Device::getDefault();
164     bool doubleSupport = dev.doubleFPConfig() > 0;
165 
166     if (!doubleSupport && depth == CV_64F)
167         return false;
168 
169     bool haveC = matC.kind() != cv::_InputArray::NONE;
170     Size sizeA = matA.size(), sizeB = matB.size(), sizeC = haveC ? matC.size() : Size(0, 0);
171     bool atrans = (flags & GEMM_1_T) != 0, btrans = (flags & GEMM_2_T) != 0, ctrans = (flags & GEMM_3_T) != 0;
172 
173     CV_Assert( !haveC || matC.type() == type );
174 
175     Size sizeD(((btrans)? sizeB.height : sizeB.width),
176                ((atrans)? sizeA.width : sizeA.height));
177     matD.create(sizeD, type);
178 
179     UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
180 
181 
182     if (!dev.intelSubgroupsSupport() || (depth == CV_64F) || cn != 1)
183     {
184         String opts;
185 
186         if (atrans)
187             sizeA = Size(sizeA.height, sizeA.width);
188         if (btrans)
189             sizeB = Size(sizeB.height, sizeB.width);
190         if (haveC && ctrans)
191             sizeC = Size(sizeC.height, sizeC.width);
192 
193         CV_Assert( sizeA.width == sizeB.height && (!haveC || sizeC == sizeD) );
194 
195         int max_wg_size = (int)dev.maxWorkGroupSize();
196         int block_size = (max_wg_size / (32*cn) < 32) ? (max_wg_size / (16*cn) < 16) ? (max_wg_size / (8*cn) < 8) ? 1 : 8 : 16 : 32;
197 
198         if (atrans)
199             A = A.t();
200 
201         if (btrans)
202             B = B.t();
203 
204         if (haveC)
205             ctrans ? transpose(matC, D) : matC.copyTo(D);
206 
207         int vectorWidths[] = { 4, 4, 2, 2, 1, 4, cn, -1 };
208         int kercn = ocl::checkOptimalVectorWidth(vectorWidths, B, D);
209 
210         opts += format(" -D T=%s -D T1=%s -D WT=%s -D cn=%d -D kercn=%d -D LOCAL_SIZE=%d%s%s%s",
211                           ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(CV_MAKETYPE(depth, kercn)),
212                           cn, kercn, block_size,
213                           (sizeA.width % block_size !=0) ? " -D NO_MULT" : "",
214                           haveC ? " -D HAVE_C" : "",
215                           doubleSupport ? " -D DOUBLE_SUPPORT" : "");
216 
217         ocl::Kernel k("gemm", cv::ocl::core::gemm_oclsrc, opts);
218         if (k.empty())
219             return false;
220 
221         if (depth == CV_64F)
222             k.args(ocl::KernelArg::ReadOnlyNoSize(A),
223                    ocl::KernelArg::ReadOnlyNoSize(B, cn, kercn),
224                    ocl::KernelArg::ReadWrite(D, cn, kercn),
225                    sizeA.width, alpha, beta);
226         else
227             k.args(ocl::KernelArg::ReadOnlyNoSize(A),
228                    ocl::KernelArg::ReadOnlyNoSize(B, cn, kercn),
229                    ocl::KernelArg::ReadWrite(D, cn, kercn),
230                    sizeA.width, (float)alpha, (float)beta);
231 
232         size_t globalsize[2] = { (size_t)sizeD.width * cn / kercn, (size_t)sizeD.height};
233         size_t localsize[2] = { (size_t)block_size, (size_t)block_size};
234 
235         return k.run(2, globalsize, block_size!=1 ? localsize : NULL, false);
236     }
237     else
238     {
239         if (haveC && beta != 0.0)
240         {
241             ctrans ? transpose(matC, D) : matC.copyTo(D);
242         }
243         else
244         {
245             beta = 0.0;
246         }
247 
248         return intel_gpu_gemm(A, sizeA,
249                               B, sizeB,
250                               D, sizeD,
251                               alpha,
252                               beta,
253                               atrans, btrans);
254     }
255 }
256 #endif
257 
258 
259 namespace hal {
260 
gemm32f(const float * src1,size_t src1_step,const float * src2,size_t src2_step,float alpha,const float * src3,size_t src3_step,float beta,float * dst,size_t dst_step,int m_a,int n_a,int n_d,int flags)261 void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
262              float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
263              int m_a, int n_a, int n_d, int flags)
264 {
265     CV_INSTRUMENT_REGION();
266     CALL_HAL(gemm32f, cv_hal_gemm32f, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
267 #ifdef CV_GEMM_BASELINE_ONLY
268     CV_CPU_CALL_BASELINE(gemm32f, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags));
269 #else
270     CV_CPU_DISPATCH(gemm32f, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags),
271         CV_CPU_DISPATCH_MODES_ALL);
272 #endif
273 }
274 
gemm64f(const double * src1,size_t src1_step,const double * src2,size_t src2_step,double alpha,const double * src3,size_t src3_step,double beta,double * dst,size_t dst_step,int m_a,int n_a,int n_d,int flags)275 void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
276              double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
277              int m_a, int n_a, int n_d, int flags)
278 {
279     CV_INSTRUMENT_REGION();
280     CALL_HAL(gemm64f, cv_hal_gemm64f, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
281 #ifdef CV_GEMM_BASELINE_ONLY
282     CV_CPU_CALL_BASELINE(gemm64f, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags));
283 #else
284     CV_CPU_DISPATCH(gemm64f, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags),
285         CV_CPU_DISPATCH_MODES_ALL);
286 #endif
287 }
288 
gemm32fc(const float * src1,size_t src1_step,const float * src2,size_t src2_step,float alpha,const float * src3,size_t src3_step,float beta,float * dst,size_t dst_step,int m_a,int n_a,int n_d,int flags)289 void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
290               float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
291               int m_a, int n_a, int n_d, int flags)
292 {
293     CV_INSTRUMENT_REGION();
294     CALL_HAL(gemm32fc, cv_hal_gemm32fc, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
295 #ifdef CV_GEMM_BASELINE_ONLY
296     CV_CPU_CALL_BASELINE(gemm32fc, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags));
297 #else
298     CV_CPU_DISPATCH(gemm32fc, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags),
299         CV_CPU_DISPATCH_MODES_ALL);
300 #endif
301 }
302 
gemm64fc(const double * src1,size_t src1_step,const double * src2,size_t src2_step,double alpha,const double * src3,size_t src3_step,double beta,double * dst,size_t dst_step,int m_a,int n_a,int n_d,int flags)303 void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
304               double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
305               int m_a, int n_a, int n_d, int flags)
306 {
307     CV_INSTRUMENT_REGION();
308     CALL_HAL(gemm64fc, cv_hal_gemm64fc, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
309 #ifdef CV_GEMM_BASELINE_ONLY
310     CV_CPU_CALL_BASELINE(gemm64fc, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags));
311 #else
312     CV_CPU_DISPATCH(gemm64fc, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags),
313         CV_CPU_DISPATCH_MODES_ALL);
314 #endif
315 }
316 
317 } // namespace hal
318 
gemm(InputArray matA,InputArray matB,double alpha,InputArray matC,double beta,OutputArray _matD,int flags)319 void gemm(InputArray matA, InputArray matB, double alpha,
320           InputArray matC, double beta, OutputArray _matD, int flags)
321 {
322 #ifdef HAVE_CLAMDBLAS
323     CV_OCL_RUN(ocl::haveAmdBlas() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2 && _matD.isUMat() &&
324         matA.cols() > 20 && matA.rows() > 20 && matB.cols() > 20, // since it works incorrect for small sizes
325         ocl_gemm_amdblas(matA, matB, alpha, matC, beta, _matD, flags))
326 #endif
327 
328 #ifdef HAVE_OPENCL
329     CV_OCL_RUN(_matD.isUMat() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2,
330                ocl_gemm(matA, matB, alpha, matC, beta, _matD, flags))
331 #endif
332 
333     Mat A = matA.getMat(), B = matB.getMat(), C = beta != 0.0 ? matC.getMat() : Mat();
334     Size a_size = A.size(), d_size;
335     int len = 0, type = A.type();
336 
337     CV_Assert_N( type == B.type(), (type == CV_32FC1 || type == CV_64FC1 || type == CV_32FC2 || type == CV_64FC2) );
338 
339     switch( flags & (GEMM_1_T|GEMM_2_T) )
340     {
341     case 0:
342         d_size = Size( B.cols, a_size.height );
343         len = B.rows;
344         CV_Assert( a_size.width == len );
345         break;
346     case 1:
347         d_size = Size( B.cols, a_size.width );
348         len = B.rows;
349         CV_Assert( a_size.height == len );
350         break;
351     case 2:
352         d_size = Size( B.rows, a_size.height );
353         len = B.cols;
354         CV_Assert( a_size.width == len );
355         break;
356     case 3:
357         d_size = Size( B.rows, a_size.width );
358         len = B.cols;
359         CV_Assert( a_size.height == len );
360         break;
361     }
362 
363     if( !C.empty() )
364     {
365         CV_Assert_N( C.type() == type,
366             (((flags&GEMM_3_T) == 0 && C.rows == d_size.height && C.cols == d_size.width) ||
367              ((flags&GEMM_3_T) != 0 && C.rows == d_size.width && C.cols == d_size.height)));
368     }
369 
370     _matD.create( d_size.height, d_size.width, type );
371     Mat D = _matD.getMat();
372     if( (flags & GEMM_3_T) != 0 && C.data == D.data )
373     {
374         transpose( C, C );
375         flags &= ~GEMM_3_T;
376     }
377 
378     Mat *DProxyPtr = &D, DProxy;
379     if( D.data == A.data || D.data == B.data )
380     {
381         DProxy = Mat(d_size.height, d_size.width, D.type());
382         DProxyPtr = &DProxy;
383     }
384 
385     if( type == CV_32FC1 )
386         hal::gemm32f(A.ptr<float>(), A.step, B.ptr<float>(), B.step, static_cast<float>(alpha),
387                      C.ptr<float>(), C.step, static_cast<float>(beta),
388                      DProxyPtr->ptr<float>(), DProxyPtr->step,
389                      a_size.height, a_size.width, DProxyPtr->cols, flags);
390     else if( type == CV_64FC1 )
391         hal::gemm64f(A.ptr<double>(), A.step, B.ptr<double>(), B.step, alpha,
392                      C.ptr<double>(), C.step, beta,
393                      DProxyPtr->ptr<double>(), DProxyPtr->step,
394                      a_size.height, a_size.width, DProxyPtr->cols, flags);
395     else if( type == CV_32FC2 )
396         hal::gemm32fc(A.ptr<float>(), A.step, B.ptr<float>(), B.step, static_cast<float>(alpha),
397                       C.ptr<float>(), C.step, static_cast<float>(beta),
398                       DProxyPtr->ptr<float>(), DProxyPtr->step,
399                       a_size.height, a_size.width, DProxyPtr->cols, flags);
400     else
401     {
402         CV_Assert( type == CV_64FC2 );
403         hal::gemm64fc(A.ptr<double>(), A.step, B.ptr<double>(), B.step, alpha,
404                       C.ptr<double>(), C.step, beta,
405                       D.ptr<double>(), D.step,
406                       a_size.height, a_size.width, DProxyPtr->cols, flags);
407     }
408 
409     if(DProxyPtr != &D)
410         DProxyPtr->copyTo(D);
411 }
412 
413 
414 
415 /****************************************************************************************\
416 *                                        Transform                                       *
417 \****************************************************************************************/
418 
getTransformFunc(int depth)419 static TransformFunc getTransformFunc(int depth)
420 {
421     CV_INSTRUMENT_REGION();
422     CV_CPU_DISPATCH(getTransformFunc, (depth),
423         CV_CPU_DISPATCH_MODES_ALL);
424 }
425 
getDiagTransformFunc(int depth)426 static TransformFunc getDiagTransformFunc(int depth)
427 {
428     CV_INSTRUMENT_REGION();
429     CV_CPU_DISPATCH(getDiagTransformFunc, (depth),
430         CV_CPU_DISPATCH_MODES_ALL);
431 }
432 
transform(InputArray _src,OutputArray _dst,InputArray _mtx)433 void transform(InputArray _src, OutputArray _dst, InputArray _mtx)
434 {
435     CV_INSTRUMENT_REGION();
436 
437     Mat src = _src.getMat(), m = _mtx.getMat();
438     int depth = src.depth(), scn = src.channels(), dcn = m.rows;
439     CV_Assert( scn == m.cols || scn + 1 == m.cols );
440     bool isDiag = false;
441 
442     _dst.create( src.size(), CV_MAKETYPE(depth, dcn) );
443     Mat dst = _dst.getMat();
444 
445     if (src.data == dst.data)  // inplace case
446     {
447         CV_Assert(scn == dcn);
448         src = src.clone();  // TODO Add performance warning
449     }
450 
451     int mtype = depth == CV_32S || depth == CV_64F ? CV_64F : CV_32F;
452     AutoBuffer<double> _mbuf;
453     double* mbuf;
454 
455     if( !m.isContinuous() || m.type() != mtype || m.cols != scn + 1 )
456     {
457         _mbuf.allocate(dcn*(scn+1));
458         mbuf = _mbuf.data();
459         Mat tmp(dcn, scn+1, mtype, mbuf);
460         memset(tmp.ptr(), 0, tmp.total()*tmp.elemSize());
461         if( m.cols == scn+1 )
462             m.convertTo(tmp, mtype);
463         else
464         {
465             Mat tmppart = tmp.colRange(0, m.cols);
466             m.convertTo(tmppart, mtype);
467         }
468         m = tmp;
469     }
470     else
471         mbuf = m.ptr<double>();
472 
473     if( scn == dcn )
474     {
475         int i, j;
476         double eps = mtype == CV_32F ? FLT_EPSILON : DBL_EPSILON;
477 
478         if( scn == 1 )
479         {
480             double alpha, beta;
481             if( mtype == CV_32F )
482                 alpha = m.at<float>(0), beta = m.at<float>(1);
483             else
484                 alpha = m.at<double>(0), beta = m.at<double>(1);
485             src.convertTo(dst, dst.type(), alpha, beta);
486             return;
487         }
488 
489         for( i = 0, isDiag = true; isDiag && i < scn; i++ )
490         {
491             for( j = 0; isDiag && j < scn; j++ )
492             {
493                 double v = mtype == CV_32F ? m.at<float>(i, j) : m.at<double>(i, j);
494                 if( i != j && fabs(v) > eps )
495                     isDiag = false;
496             }
497         }
498     }
499 
500     TransformFunc func = isDiag ? getDiagTransformFunc(depth): getTransformFunc(depth);
501     CV_Assert( func != 0 );
502 
503     const Mat* arrays[] = {&src, &dst, 0};
504     uchar* ptrs[2] = {};
505     NAryMatIterator it(arrays, ptrs);
506     size_t i, total = it.size;
507 
508     for( i = 0; i < it.nplanes; i++, ++it )
509         func( ptrs[0], ptrs[1], (uchar*)mbuf, (int)total, scn, dcn );
510 }
511 
512 
513 
514 /****************************************************************************************\
515 *                                  Perspective Transform                                 *
516 \****************************************************************************************/
517 
getPerspectiveTransform(int depth)518 static TransformFunc getPerspectiveTransform(int depth)
519 {
520     CV_INSTRUMENT_REGION();
521     CV_CPU_DISPATCH(getPerspectiveTransform, (depth),
522         CV_CPU_DISPATCH_MODES_ALL);
523 }
524 
perspectiveTransform(InputArray _src,OutputArray _dst,InputArray _mtx)525 void perspectiveTransform(InputArray _src, OutputArray _dst, InputArray _mtx)
526 {
527     CV_INSTRUMENT_REGION();
528 
529     Mat src = _src.getMat(), m = _mtx.getMat();
530     int depth = src.depth(), scn = src.channels(), dcn = m.rows-1;
531     CV_Assert( scn + 1 == m.cols );
532     CV_Assert( depth == CV_32F || depth == CV_64F );
533 
534     _dst.create( src.size(), CV_MAKETYPE(depth, dcn) );
535     Mat dst = _dst.getMat();
536 
537     const int mtype = CV_64F;
538     AutoBuffer<double> _mbuf;
539     double* mbuf = m.ptr<double>();
540 
541     if( !m.isContinuous() || m.type() != mtype )
542     {
543         _mbuf.allocate((dcn+1)*(scn+1));
544         mbuf = _mbuf.data();
545         Mat tmp(dcn+1, scn+1, mtype, mbuf);
546         m.convertTo(tmp, mtype);
547         m = tmp;
548     }
549 
550     TransformFunc func = getPerspectiveTransform(depth);
551     CV_Assert( func != 0 );
552 
553     const Mat* arrays[] = {&src, &dst, 0};
554     uchar* ptrs[2] = {};
555     NAryMatIterator it(arrays, ptrs);
556     size_t i, total = it.size;
557 
558     for( i = 0; i < it.nplanes; i++, ++it )
559         func( ptrs[0], ptrs[1], (uchar*)mbuf, (int)total, scn, dcn );
560 }
561 
562 /****************************************************************************************\
563 *                                       ScaleAdd                                         *
564 \****************************************************************************************/
565 
566 #ifdef HAVE_OPENCL
567 
ocl_scaleAdd(InputArray _src1,double alpha,InputArray _src2,OutputArray _dst,int type)568 static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type )
569 {
570     const ocl::Device & d = ocl::Device::getDefault();
571 
572     bool doubleSupport = d.doubleFPConfig() > 0;
573     Size size = _src1.size();
574     int depth = CV_MAT_DEPTH(type);
575     if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() )
576         return false;
577 
578     _dst.create(size, type);
579     int cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F);
580     int kercn = ocl::predictOptimalVectorWidthMax(_src1, _src2, _dst),
581         rowsPerWI = d.isIntel() ? 4 : 1;
582 
583     char cvt[2][50];
584     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
585                   format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D DEPTH_dst=%d -D workT=%s -D convertToWT1=%s"
586                          " -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s -D workT1=%s"
587                          " -D wdepth=%d%s -D rowsPerWI=%d",
588                          ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), depth,
589                          ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)),
590                          ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
591                          ocl::convertTypeStr(wdepth, depth, kercn, cvt[1]),
592                          ocl::typeToStr(wdepth), wdepth,
593                          doubleSupport ? " -D DOUBLE_SUPPORT" : "", rowsPerWI));
594     if (k.empty())
595         return false;
596 
597     UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat();
598 
599     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
600             src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
601             dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
602 
603     if (wdepth == CV_32F)
604         k.args(src1arg, src2arg, dstarg, (float)alpha);
605     else
606         k.args(src1arg, src2arg, dstarg, alpha);
607 
608     size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
609     return k.run(2, globalsize, NULL, false);
610 }
611 
612 #endif
613 
getScaleAddFunc(int depth)614 static ScaleAddFunc getScaleAddFunc(int depth)
615 {
616     CV_INSTRUMENT_REGION();
617     CV_CPU_DISPATCH(getScaleAddFunc, (depth),
618         CV_CPU_DISPATCH_MODES_ALL);
619 }
620 
scaleAdd(InputArray _src1,double alpha,InputArray _src2,OutputArray _dst)621 void scaleAdd(InputArray _src1, double alpha, InputArray _src2, OutputArray _dst)
622 {
623     CV_INSTRUMENT_REGION();
624 
625     int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
626     CV_Assert( type == _src2.type() );
627 
628     CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat(),
629             ocl_scaleAdd(_src1, alpha, _src2, _dst, type))
630 
631     if( depth < CV_32F )
632     {
633         addWeighted(_src1, alpha, _src2, 1, 0, _dst, depth);
634         return;
635     }
636 
637     Mat src1 = _src1.getMat(), src2 = _src2.getMat();
638     CV_Assert(src1.size == src2.size);
639 
640     _dst.create(src1.dims, src1.size, type);
641     Mat dst = _dst.getMat();
642 
643     float falpha = (float)alpha;
644     void* palpha = depth == CV_32F ? (void*)&falpha : (void*)&alpha;
645 
646     ScaleAddFunc func = getScaleAddFunc(depth);
647     CV_Assert(func);
648 
649     if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous())
650     {
651         size_t len = src1.total()*cn;
652         func(src1.ptr(), src2.ptr(), dst.ptr(), (int)len, palpha);
653         return;
654     }
655 
656     const Mat* arrays[] = {&src1, &src2, &dst, 0};
657     uchar* ptrs[3] = {};
658     NAryMatIterator it(arrays, ptrs);
659     size_t i, len = it.size*cn;
660 
661     for( i = 0; i < it.nplanes; i++, ++it )
662         func( ptrs[0], ptrs[1], ptrs[2], (int)len, palpha );
663 }
664 
665 /****************************************************************************************\
666 *                                 Covariation Matrix                                     *
667 \****************************************************************************************/
668 
calcCovarMatrix(const Mat * data,int nsamples,Mat & covar,Mat & _mean,int flags,int ctype)669 void calcCovarMatrix( const Mat* data, int nsamples, Mat& covar, Mat& _mean, int flags, int ctype )
670 {
671     CV_INSTRUMENT_REGION();
672 
673     CV_Assert_N( data, nsamples > 0 );
674     Size size = data[0].size();
675     int sz = size.width * size.height, esz = (int)data[0].elemSize();
676     int type = data[0].type();
677     Mat mean;
678     ctype = std::max(std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), _mean.depth()), CV_32F);
679 
680     if( (flags & CV_COVAR_USE_AVG) != 0 )
681     {
682         CV_Assert( _mean.size() == size );
683         if( _mean.isContinuous() && _mean.type() == ctype )
684             mean = _mean.reshape(1, 1);
685         else
686         {
687             _mean.convertTo(mean, ctype);
688             mean = mean.reshape(1, 1);
689         }
690     }
691 
692     Mat _data(nsamples, sz, type);
693 
694     for( int i = 0; i < nsamples; i++ )
695     {
696         CV_Assert_N( data[i].size() == size, data[i].type() == type );
697         if( data[i].isContinuous() )
698             memcpy( _data.ptr(i), data[i].ptr(), sz*esz );
699         else
700         {
701             Mat dataRow(size.height, size.width, type, _data.ptr(i));
702             data[i].copyTo(dataRow);
703         }
704     }
705 
706     calcCovarMatrix( _data, covar, mean, (flags & ~(CV_COVAR_ROWS|CV_COVAR_COLS)) | CV_COVAR_ROWS, ctype );
707     if( (flags & CV_COVAR_USE_AVG) == 0 )
708         _mean = mean.reshape(1, size.height);
709 }
710 
calcCovarMatrix(InputArray _src,OutputArray _covar,InputOutputArray _mean,int flags,int ctype)711 void calcCovarMatrix( InputArray _src, OutputArray _covar, InputOutputArray _mean, int flags, int ctype )
712 {
713     CV_INSTRUMENT_REGION();
714 
715     if(_src.kind() == _InputArray::STD_VECTOR_MAT || _src.kind() == _InputArray::STD_ARRAY_MAT)
716     {
717         std::vector<cv::Mat> src;
718         _src.getMatVector(src);
719 
720         CV_Assert( src.size() > 0 );
721 
722         Size size = src[0].size();
723         int type = src[0].type();
724 
725         ctype = std::max(std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), _mean.depth()), CV_32F);
726 
727         Mat _data(static_cast<int>(src.size()), size.area(), type);
728 
729         int i = 0;
730         for(std::vector<cv::Mat>::iterator each = src.begin(); each != src.end(); ++each, ++i )
731         {
732             CV_Assert_N( (*each).size() == size, (*each).type() == type );
733             Mat dataRow(size.height, size.width, type, _data.ptr(i));
734             (*each).copyTo(dataRow);
735         }
736 
737         Mat mean;
738         if( (flags & CV_COVAR_USE_AVG) != 0 )
739         {
740             CV_Assert( _mean.size() == size );
741 
742             if( mean.type() != ctype )
743             {
744                 mean = _mean.getMat();
745                 _mean.create(mean.size(), ctype);
746                 Mat tmp = _mean.getMat();
747                 mean.convertTo(tmp, ctype);
748                 mean = tmp;
749             }
750 
751             mean = _mean.getMat().reshape(1, 1);
752         }
753 
754         calcCovarMatrix( _data, _covar, mean, (flags & ~(CV_COVAR_ROWS|CV_COVAR_COLS)) | CV_COVAR_ROWS, ctype );
755 
756         if( (flags & CV_COVAR_USE_AVG) == 0 )
757         {
758             mean = mean.reshape(1, size.height);
759             mean.copyTo(_mean);
760         }
761         return;
762     }
763 
764     Mat data = _src.getMat(), mean;
765     CV_Assert( ((flags & CV_COVAR_ROWS) != 0) ^ ((flags & CV_COVAR_COLS) != 0) );
766     bool takeRows = (flags & CV_COVAR_ROWS) != 0;
767     int type = data.type();
768     int nsamples = takeRows ? data.rows : data.cols;
769     CV_Assert( nsamples > 0 );
770     Size size = takeRows ? Size(data.cols, 1) : Size(1, data.rows);
771 
772     if( (flags & CV_COVAR_USE_AVG) != 0 )
773     {
774         mean = _mean.getMat();
775         ctype = std::max(std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), mean.depth()), CV_32F);
776         CV_Assert( mean.size() == size );
777         if( mean.type() != ctype )
778         {
779             _mean.create(mean.size(), ctype);
780             Mat tmp = _mean.getMat();
781             mean.convertTo(tmp, ctype);
782             mean = tmp;
783         }
784     }
785     else
786     {
787         ctype = std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), CV_32F);
788         reduce( _src, _mean, takeRows ? 0 : 1, CV_REDUCE_AVG, ctype );
789         mean = _mean.getMat();
790     }
791 
792     mulTransposed( data, _covar, ((flags & CV_COVAR_NORMAL) == 0) ^ takeRows,
793         mean, (flags & CV_COVAR_SCALE) != 0 ? 1./nsamples : 1, ctype );
794 }
795 
796 
797 
798 /****************************************************************************************\
799 *                                        Mahalanobis                                     *
800 \****************************************************************************************/
801 
getMahalanobisImplFunc(int depth)802 static MahalanobisImplFunc getMahalanobisImplFunc(int depth)
803 {
804 #ifdef CV_MAHALANOBIS_BASELINE_ONLY
805     CV_CPU_CALL_BASELINE(getMahalanobisImplFunc, (depth));
806 #else
807     CV_INSTRUMENT_REGION();
808     CV_CPU_DISPATCH(getMahalanobisImplFunc, (depth),
809         CV_CPU_DISPATCH_MODES_ALL);
810 #endif
811 }
812 
813 
Mahalanobis(InputArray _v1,InputArray _v2,InputArray _icovar)814 double Mahalanobis(InputArray _v1, InputArray _v2, InputArray _icovar)
815 {
816     CV_INSTRUMENT_REGION();
817 
818     Mat v1 = _v1.getMat(), v2 = _v2.getMat(), icovar = _icovar.getMat();
819     int type = v1.type(), depth = v1.depth();
820     Size sz = v1.size();
821     int len = sz.width*sz.height*v1.channels();
822     AutoBuffer<double> buf(len);
823 
824     CV_Assert_N( type == v2.type(), type == icovar.type(),
825         sz == v2.size(), len == icovar.rows && len == icovar.cols );
826 
827     sz.width *= v1.channels();
828     if( v1.isContinuous() && v2.isContinuous() )
829     {
830         sz.width *= sz.height;
831         sz.height = 1;
832     }
833 
834     MahalanobisImplFunc func = getMahalanobisImplFunc(depth);
835     CV_Assert(func);
836 
837     double result = func(v1, v2, icovar, buf.data(), len);
838     return std::sqrt(result);
839 }
840 
841 
842 
843 /****************************************************************************************\
844 *                                        MulTransposed                                   *
845 \****************************************************************************************/
846 
getMulTransposedFunc(int stype,int dtype,bool ata)847 static MulTransposedFunc getMulTransposedFunc(int stype, int dtype, bool ata)
848 {
849 #ifdef CV_MULTRANSPOSED_BASELINE_ONLY
850     CV_CPU_CALL_BASELINE(getMulTransposedFunc, (stype, dtype, ata));
851 #else
852     CV_INSTRUMENT_REGION();
853     CV_CPU_DISPATCH(getMulTransposedFunc, (stype, dtype, ata),
854         CV_CPU_DISPATCH_MODES_ALL);
855 #endif
856 }
857 
mulTransposed(InputArray _src,OutputArray _dst,bool ata,InputArray _delta,double scale,int dtype)858 void mulTransposed(InputArray _src, OutputArray _dst, bool ata,
859                    InputArray _delta, double scale, int dtype)
860 {
861     CV_INSTRUMENT_REGION();
862 
863     Mat src = _src.getMat(), delta = _delta.getMat();
864     const int gemm_level = 100; // boundary above which GEMM is faster.
865     int stype = src.type();
866     dtype = std::max(std::max(CV_MAT_DEPTH(dtype >= 0 ? dtype : stype), delta.depth()), CV_32F);
867     CV_Assert( src.channels() == 1 );
868 
869     if( !delta.empty() )
870     {
871         CV_Assert_N( delta.channels() == 1,
872             (delta.rows == src.rows || delta.rows == 1),
873             (delta.cols == src.cols || delta.cols == 1));
874         if( delta.type() != dtype )
875             delta.convertTo(delta, dtype);
876     }
877 
878     int dsize = ata ? src.cols : src.rows;
879     _dst.create( dsize, dsize, dtype );
880     Mat dst = _dst.getMat();
881 
882     if( src.data == dst.data || (stype == dtype &&
883         (dst.cols >= gemm_level && dst.rows >= gemm_level &&
884          src.cols >= gemm_level && src.rows >= gemm_level)))
885     {
886         Mat src2;
887         const Mat* tsrc = &src;
888         if( !delta.empty() )
889         {
890             if( delta.size() == src.size() )
891                 subtract( src, delta, src2 );
892             else
893             {
894                 repeat(delta, src.rows/delta.rows, src.cols/delta.cols, src2);
895                 subtract( src, src2, src2 );
896             }
897             tsrc = &src2;
898         }
899         gemm( *tsrc, *tsrc, scale, Mat(), 0, dst, ata ? GEMM_1_T : GEMM_2_T );
900     }
901     else
902     {
903         MulTransposedFunc func = getMulTransposedFunc(stype, dtype, ata);
904         if( !func )
905             CV_Error( CV_StsUnsupportedFormat, "" );
906 
907         func( src, dst, delta, scale );
908         completeSymm( dst, false );
909     }
910 }
911 
912 /****************************************************************************************\
913 *                                      Dot Product                                       *
914 \****************************************************************************************/
915 
dotProd_8u(const uchar * src1,const uchar * src2,int len)916 static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
917 {
918     CV_INSTRUMENT_REGION();
919     CV_CPU_DISPATCH(dotProd_8u, (src1, src2, len),
920         CV_CPU_DISPATCH_MODES_ALL);
921 }
dotProd_8s(const schar * src1,const schar * src2,int len)922 static double dotProd_8s(const schar* src1, const schar* src2, int len)
923 {
924     CV_INSTRUMENT_REGION();
925     CV_CPU_DISPATCH(dotProd_8s, (src1, src2, len),
926         CV_CPU_DISPATCH_MODES_ALL);
927 }
dotProd_16u(const ushort * src1,const ushort * src2,int len)928 static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
929 {
930     CV_INSTRUMENT_REGION();
931     CV_CPU_DISPATCH(dotProd_16u, (src1, src2, len),
932         CV_CPU_DISPATCH_MODES_ALL);
933 }
dotProd_16s(const short * src1,const short * src2,int len)934 static double dotProd_16s(const short* src1, const short* src2, int len)
935 {
936     CV_INSTRUMENT_REGION();
937     CV_CPU_DISPATCH(dotProd_16s, (src1, src2, len),
938         CV_CPU_DISPATCH_MODES_ALL);
939 }
dotProd_32s(const int * src1,const int * src2,int len)940 static double dotProd_32s(const int* src1, const int* src2, int len)
941 {
942     CV_INSTRUMENT_REGION();
943     CV_CPU_DISPATCH(dotProd_32s, (src1, src2, len),
944         CV_CPU_DISPATCH_MODES_ALL);
945 }
dotProd_32f(const float * src1,const float * src2,int len)946 static double dotProd_32f(const float* src1, const float* src2, int len)
947 {
948     CV_INSTRUMENT_REGION();
949     CV_CPU_DISPATCH(dotProd_32f, (src1, src2, len),
950         CV_CPU_DISPATCH_MODES_ALL);
951 }
dotProd_64f(const double * src1,const double * src2,int len)952 static double dotProd_64f(const double* src1, const double* src2, int len)
953 {
954     CV_INSTRUMENT_REGION();
955     CV_CPU_DISPATCH(dotProd_64f, (src1, src2, len),
956         CV_CPU_DISPATCH_MODES_ALL);
957 }
958 
959 typedef double (*DotProdFunc)(const uchar* src1, const uchar* src2, int len);
960 
getDotProdFunc(int depth)961 static DotProdFunc getDotProdFunc(int depth)
962 {
963     static DotProdFunc dotProdTab[] =
964     {
965         (DotProdFunc)GET_OPTIMIZED(dotProd_8u), (DotProdFunc)GET_OPTIMIZED(dotProd_8s),
966         (DotProdFunc)dotProd_16u, (DotProdFunc)dotProd_16s,
967         (DotProdFunc)dotProd_32s, (DotProdFunc)GET_OPTIMIZED(dotProd_32f),
968         (DotProdFunc)dotProd_64f, 0
969     };
970 
971     return dotProdTab[depth];
972 }
973 
dot(InputArray _mat) const974 double Mat::dot(InputArray _mat) const
975 {
976     CV_INSTRUMENT_REGION();
977 
978     Mat mat = _mat.getMat();
979     int cn = channels();
980     DotProdFunc func = getDotProdFunc(depth());
981     CV_Assert_N( mat.type() == type(), mat.size == size, func != 0 );
982 
983     if( isContinuous() && mat.isContinuous() )
984     {
985         size_t len = total()*cn;
986         if( len == (size_t)(int)len )
987             return func(data, mat.data, (int)len);
988     }
989 
990     const Mat* arrays[] = {this, &mat, 0};
991     uchar* ptrs[2] = {};
992     NAryMatIterator it(arrays, ptrs);
993     int len = (int)(it.size*cn);
994     double r = 0;
995 
996     for( size_t i = 0; i < it.nplanes; i++, ++it )
997         r += func( ptrs[0], ptrs[1], len );
998 
999     return r;
1000 }
1001 
1002 
1003 #ifdef HAVE_OPENCL
1004 
ocl_dot(InputArray _src1,InputArray _src2,double & res)1005 static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
1006 {
1007     UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
1008 
1009     int type = src1.type(), depth = CV_MAT_DEPTH(type),
1010             kercn = ocl::predictOptimalVectorWidth(src1, src2);
1011     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
1012 
1013     if ( !doubleSupport && depth == CV_64F )
1014         return false;
1015 
1016     int dbsize = ocl::Device::getDefault().maxComputeUnits();
1017     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
1018     int ddepth = std::max(CV_32F, depth);
1019 
1020     int wgs2_aligned = 1;
1021     while (wgs2_aligned < (int)wgs)
1022         wgs2_aligned <<= 1;
1023     wgs2_aligned >>= 1;
1024 
1025     char cvt[40];
1026     ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
1027                   format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
1028                          "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
1029                          ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
1030                          ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
1031                          ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
1032                          (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
1033                          _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
1034                          _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
1035     if (k.empty())
1036         return false;
1037 
1038     UMat db(1, dbsize, ddepth);
1039 
1040     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
1041             src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
1042             dbarg = ocl::KernelArg::PtrWriteOnly(db);
1043 
1044     k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
1045 
1046     size_t globalsize = dbsize * wgs;
1047     if (k.run(1, &globalsize, &wgs, true))
1048     {
1049         res = sum(db.getMat(ACCESS_READ))[0];
1050         return true;
1051     }
1052     return false;
1053 }
1054 
1055 #endif
1056 
dot(InputArray m) const1057 double UMat::dot(InputArray m) const
1058 {
1059     CV_INSTRUMENT_REGION();
1060 
1061     CV_Assert(m.sameSize(*this) && m.type() == type());
1062 
1063 #ifdef HAVE_OPENCL
1064     double r = 0;
1065     CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
1066 #endif
1067 
1068     return getMat(ACCESS_READ).dot(m);
1069 }
1070 
1071 }  // namespace cv::
1072 
1073 
1074 #ifndef OPENCV_EXCLUDE_C_API
1075 /****************************************************************************************\
1076 *                                    Earlier API                                         *
1077 \****************************************************************************************/
1078 
cvGEMM(const CvArr * Aarr,const CvArr * Barr,double alpha,const CvArr * Carr,double beta,CvArr * Darr,int flags)1079 CV_IMPL void cvGEMM( const CvArr* Aarr, const CvArr* Barr, double alpha,
1080                      const CvArr* Carr, double beta, CvArr* Darr, int flags )
1081 {
1082     cv::Mat A = cv::cvarrToMat(Aarr), B = cv::cvarrToMat(Barr);
1083     cv::Mat C, D = cv::cvarrToMat(Darr);
1084 
1085     if( Carr )
1086         C = cv::cvarrToMat(Carr);
1087 
1088     CV_Assert_N( (D.rows == ((flags & CV_GEMM_A_T) == 0 ? A.rows : A.cols)),
1089                (D.cols == ((flags & CV_GEMM_B_T) == 0 ? B.cols : B.rows)),
1090                D.type() == A.type() );
1091 
1092     gemm( A, B, alpha, C, beta, D, flags );
1093 }
1094 
1095 
1096 CV_IMPL void
cvTransform(const CvArr * srcarr,CvArr * dstarr,const CvMat * transmat,const CvMat * shiftvec)1097 cvTransform( const CvArr* srcarr, CvArr* dstarr,
1098              const CvMat* transmat, const CvMat* shiftvec )
1099 {
1100     cv::Mat m = cv::cvarrToMat(transmat), src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1101 
1102     if( shiftvec )
1103     {
1104         cv::Mat v = cv::cvarrToMat(shiftvec).reshape(1,m.rows),
1105             _m(m.rows, m.cols + 1, m.type()), m1 = _m.colRange(0,m.cols), v1 = _m.col(m.cols);
1106         m.convertTo(m1, m1.type());
1107         v.convertTo(v1, v1.type());
1108         m = _m;
1109     }
1110 
1111     CV_Assert_N( dst.depth() == src.depth(), dst.channels() == m.rows );
1112     cv::transform( src, dst, m );
1113 }
1114 
1115 
1116 CV_IMPL void
cvPerspectiveTransform(const CvArr * srcarr,CvArr * dstarr,const CvMat * mat)1117 cvPerspectiveTransform( const CvArr* srcarr, CvArr* dstarr, const CvMat* mat )
1118 {
1119     cv::Mat m = cv::cvarrToMat(mat), src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1120 
1121     CV_Assert_N( dst.type() == src.type(), dst.channels() == m.rows-1 );
1122     cv::perspectiveTransform( src, dst, m );
1123 }
1124 
1125 
cvScaleAdd(const CvArr * srcarr1,CvScalar scale,const CvArr * srcarr2,CvArr * dstarr)1126 CV_IMPL void cvScaleAdd( const CvArr* srcarr1, CvScalar scale,
1127                          const CvArr* srcarr2, CvArr* dstarr )
1128 {
1129     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
1130 
1131     CV_Assert_N( src1.size == dst.size, src1.type() == dst.type() );
1132     cv::scaleAdd( src1, scale.val[0], cv::cvarrToMat(srcarr2), dst );
1133 }
1134 
1135 
1136 CV_IMPL void
cvCalcCovarMatrix(const CvArr ** vecarr,int count,CvArr * covarr,CvArr * avgarr,int flags)1137 cvCalcCovarMatrix( const CvArr** vecarr, int count,
1138                    CvArr* covarr, CvArr* avgarr, int flags )
1139 {
1140     cv::Mat cov0 = cv::cvarrToMat(covarr), cov = cov0, mean0, mean;
1141     CV_Assert_N( vecarr != 0, count >= 1 );
1142 
1143     if( avgarr )
1144         mean = mean0 = cv::cvarrToMat(avgarr);
1145 
1146     if( (flags & CV_COVAR_COLS) != 0 || (flags & CV_COVAR_ROWS) != 0 )
1147     {
1148 
1149         cv::Mat data = cv::cvarrToMat(vecarr[0]);
1150         cv::calcCovarMatrix( data, cov, mean, flags, cov.type() );
1151     }
1152     else
1153     {
1154         std::vector<cv::Mat> data(count);
1155         for( int i = 0; i < count; i++ )
1156             data[i] = cv::cvarrToMat(vecarr[i]);
1157         cv::calcCovarMatrix( &data[0], count, cov, mean, flags, cov.type() );
1158     }
1159 
1160     if( mean.data != mean0.data && mean0.data )
1161         mean.convertTo(mean0, mean0.type());
1162 
1163     if( cov.data != cov0.data )
1164         cov.convertTo(cov0, cov0.type());
1165 }
1166 
1167 
1168 CV_IMPL double
cvMahalanobis(const CvArr * srcAarr,const CvArr * srcBarr,const CvArr * matarr)1169 cvMahalanobis( const CvArr* srcAarr, const CvArr* srcBarr, const CvArr* matarr )
1170 {
1171     return cv::Mahalanobis(cv::cvarrToMat(srcAarr),
1172         cv::cvarrToMat(srcBarr), cv::cvarrToMat(matarr));
1173 }
1174 
1175 CV_IMPL void
cvMulTransposed(const CvArr * srcarr,CvArr * dstarr,int order,const CvArr * deltaarr,double scale)1176 cvMulTransposed( const CvArr* srcarr, CvArr* dstarr,
1177                  int order, const CvArr* deltaarr, double scale )
1178 {
1179     cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0, delta;
1180     if( deltaarr )
1181         delta = cv::cvarrToMat(deltaarr);
1182     cv::mulTransposed( src, dst, order != 0, delta, scale, dst.type());
1183     if( dst.data != dst0.data )
1184         dst.convertTo(dst0, dst0.type());
1185 }
1186 
cvDotProduct(const CvArr * srcAarr,const CvArr * srcBarr)1187 CV_IMPL double cvDotProduct( const CvArr* srcAarr, const CvArr* srcBarr )
1188 {
1189     return cv::cvarrToMat(srcAarr).dot(cv::cvarrToMat(srcBarr));
1190 }
1191 
1192 
1193 CV_IMPL void
cvCalcPCA(const CvArr * data_arr,CvArr * avg_arr,CvArr * eigenvals,CvArr * eigenvects,int flags)1194 cvCalcPCA( const CvArr* data_arr, CvArr* avg_arr, CvArr* eigenvals, CvArr* eigenvects, int flags )
1195 {
1196     cv::Mat data = cv::cvarrToMat(data_arr), mean0 = cv::cvarrToMat(avg_arr);
1197     cv::Mat evals0 = cv::cvarrToMat(eigenvals), evects0 = cv::cvarrToMat(eigenvects);
1198     cv::Mat mean = mean0, evals = evals0, evects = evects0;
1199 
1200     cv::PCA pca;
1201     pca.mean = mean;
1202     pca.eigenvalues = evals;
1203     pca.eigenvectors = evects;
1204 
1205     pca(data, (flags & CV_PCA_USE_AVG) ? mean : cv::Mat(),
1206         flags, !evals.empty() ? evals.rows + evals.cols - 1 : 0);
1207 
1208     if( pca.mean.size() == mean.size() )
1209         pca.mean.convertTo( mean, mean.type() );
1210     else
1211     {
1212         cv::Mat temp; pca.mean.convertTo( temp, mean.type() );
1213         transpose( temp, mean );
1214     }
1215 
1216     evals = pca.eigenvalues;
1217     evects = pca.eigenvectors;
1218     int ecount0 = evals0.cols + evals0.rows - 1;
1219     int ecount = evals.cols + evals.rows - 1;
1220 
1221     CV_Assert_N( (evals0.cols == 1 || evals0.rows == 1),
1222                 ecount0 <= ecount,
1223                 evects0.cols == evects.cols,
1224                 evects0.rows == ecount0 );
1225 
1226     cv::Mat temp = evals0;
1227     if( evals.rows == 1 )
1228         evals.colRange(0, ecount0).convertTo(temp, evals0.type());
1229     else
1230         evals.rowRange(0, ecount0).convertTo(temp, evals0.type());
1231     if( temp.data != evals0.data )
1232         transpose(temp, evals0);
1233     evects.rowRange(0, ecount0).convertTo( evects0, evects0.type() );
1234 
1235     // otherwise some datatype's or size's were incorrect, so the output arrays have been reallocated
1236     CV_Assert( mean0.data == mean.data );
1237 }
1238 
1239 
1240 CV_IMPL void
cvProjectPCA(const CvArr * data_arr,const CvArr * avg_arr,const CvArr * eigenvects,CvArr * result_arr)1241 cvProjectPCA( const CvArr* data_arr, const CvArr* avg_arr,
1242               const CvArr* eigenvects, CvArr* result_arr )
1243 {
1244     cv::Mat data = cv::cvarrToMat(data_arr), mean = cv::cvarrToMat(avg_arr);
1245     cv::Mat evects = cv::cvarrToMat(eigenvects), dst0 = cv::cvarrToMat(result_arr), dst = dst0;
1246 
1247     cv::PCA pca;
1248     pca.mean = mean;
1249     int n;
1250     if( mean.rows == 1 )
1251     {
1252         CV_Assert_N(dst.cols <= evects.rows, dst.rows == data.rows);
1253         n = dst.cols;
1254     }
1255     else
1256     {
1257         CV_Assert_N(dst.rows <= evects.rows, dst.cols == data.cols);
1258         n = dst.rows;
1259     }
1260     pca.eigenvectors = evects.rowRange(0, n);
1261 
1262     cv::Mat result = pca.project(data);
1263     if( result.cols != dst.cols )
1264         result = result.reshape(1, 1);
1265     result.convertTo(dst, dst.type());
1266 
1267     CV_Assert(dst0.data == dst.data);
1268 }
1269 
1270 
1271 CV_IMPL void
cvBackProjectPCA(const CvArr * proj_arr,const CvArr * avg_arr,const CvArr * eigenvects,CvArr * result_arr)1272 cvBackProjectPCA( const CvArr* proj_arr, const CvArr* avg_arr,
1273                   const CvArr* eigenvects, CvArr* result_arr )
1274 {
1275     cv::Mat data = cv::cvarrToMat(proj_arr), mean = cv::cvarrToMat(avg_arr);
1276     cv::Mat evects = cv::cvarrToMat(eigenvects), dst0 = cv::cvarrToMat(result_arr), dst = dst0;
1277 
1278     cv::PCA pca;
1279     pca.mean = mean;
1280     int n;
1281     if( mean.rows == 1 )
1282     {
1283         CV_Assert_N(data.cols <= evects.rows, dst.rows == data.rows);
1284         n = data.cols;
1285     }
1286     else
1287     {
1288         CV_Assert_N(data.rows <= evects.rows, dst.cols == data.cols);
1289         n = data.rows;
1290     }
1291     pca.eigenvectors = evects.rowRange(0, n);
1292 
1293     cv::Mat result = pca.backProject(data);
1294     result.convertTo(dst, dst.type());
1295 
1296     CV_Assert(dst0.data == dst.data);
1297 }
1298 
1299 #endif  // OPENCV_EXCLUDE_C_API
1300 
1301 /* End of file. */
1302