1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
23 //
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
27 //
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 #include "precomp.hpp"
45 #include "opencl_kernels_core.hpp"
46 #include "opencv2/core/opencl/runtime/opencl_clblas.hpp"
47 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
48 #include "intel_gpu_gemm.inl.hpp"
49
50 #include "matmul.simd.hpp"
51 #include "matmul.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
52
53 namespace cv
54 {
55
56 /****************************************************************************************\
57 * GEMM *
58 \****************************************************************************************/
59
60 #ifdef HAVE_CLAMDBLAS
61
ocl_gemm_amdblas(InputArray matA,InputArray matB,double alpha,InputArray matC,double beta,OutputArray matD,int flags)62 static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
63 InputArray matC, double beta, OutputArray matD, int flags )
64 {
65 int type = matA.type(), esz = CV_ELEM_SIZE(type);
66 bool haveC = matC.kind() != cv::_InputArray::NONE;
67 Size sizeA = matA.size(), sizeB = matB.size(), sizeC = haveC ? matC.size() : Size(0, 0);
68 bool atrans = (flags & GEMM_1_T) != 0, btrans = (flags & GEMM_2_T) != 0, ctrans = (flags & GEMM_3_T) != 0;
69
70 if (atrans)
71 sizeA = Size(sizeA.height, sizeA.width);
72 if (btrans)
73 sizeB = Size(sizeB.height, sizeB.width);
74 if (haveC && ctrans)
75 sizeC = Size(sizeC.height, sizeC.width);
76
77 Size sizeD(sizeB.width, sizeA.height);
78
79 CV_Assert( matB.type() == type && (!haveC || matC.type() == type) );
80 CV_Assert( sizeA.width == sizeB.height && (!haveC || sizeC == sizeD) );
81
82 matD.create(sizeD, type);
83 if ( matA.offset() % esz != 0 || matA.step() % esz != 0 ||
84 matB.offset() % esz != 0 || matB.step() % esz != 0 ||
85 (haveC && (matC.offset() % esz != 0 || matC.step() % esz != 0)) )
86 return false;
87
88 UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
89 if (!ocl::internal::isCLBuffer(A) || !ocl::internal::isCLBuffer(B) || !ocl::internal::isCLBuffer(D))
90 {
91 return false;
92 }
93 if (haveC)
94 {
95 UMat C = matC.getUMat();
96 if (!ocl::internal::isCLBuffer(C))
97 return false;
98 }
99 if (haveC)
100 ctrans ? transpose(matC, D) : matC.copyTo(D);
101 else
102 D.setTo(Scalar::all(0));
103
104 int M = sizeD.height, N = sizeD.width, K = sizeA.width;
105 int lda = (int)A.step / esz, ldb = (int)B.step / esz, ldc = (int)D.step / esz;
106 int offa = (int)A.offset / esz, offb = (int)B.offset / esz, offc = (int)D.offset / esz;
107
108 cl_command_queue clq = (cl_command_queue)ocl::Queue::getDefault().ptr();
109 clblasTranspose transA = atrans ? clblasTrans : clblasNoTrans;
110 clblasTranspose transB = btrans ? clblasTrans : clblasNoTrans;
111 clblasOrder order = clblasRowMajor;
112 clblasStatus status = clblasSuccess;
113
114 if (type == CV_32FC1)
115 status = clblasSgemm(order, transA, transB, M, N, K,
116 (cl_float)alpha, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
117 (const cl_mem)B.handle(ACCESS_READ), offb, ldb,
118 (cl_float)beta, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
119 1, &clq, 0, NULL, NULL);
120 else if (type == CV_64FC1)
121 status = clblasDgemm(order, transA, transB, M, N, K,
122 alpha, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
123 (const cl_mem)B.handle(ACCESS_READ), offb, ldb,
124 beta, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
125 1, &clq, 0, NULL, NULL);
126 else if (type == CV_32FC2)
127 {
128 cl_float2 alpha_2 = { { (cl_float)alpha, 0 } };
129 cl_float2 beta_2 = { { (cl_float)beta, 0 } };
130 status = clblasCgemm(order, transA, transB, M, N, K,
131 alpha_2, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
132 (const cl_mem)B.handle(ACCESS_READ), offb, ldb,
133 beta_2, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
134 1, &clq, 0, NULL, NULL);
135 }
136 else if (type == CV_64FC2)
137 {
138 cl_double2 alpha_2 = { { alpha, 0 } };
139 cl_double2 beta_2 = { { beta, 0 } };
140 status = clblasZgemm(order, transA, transB, M, N, K,
141 alpha_2, (const cl_mem)A.handle(ACCESS_READ), offa, lda,
142 (const cl_mem)B.handle(ACCESS_READ), offb, ldb,
143 beta_2, (cl_mem)D.handle(ACCESS_RW), offc, ldc,
144 1, &clq, 0, NULL, NULL);
145 }
146 else
147 CV_Error(Error::StsUnsupportedFormat, "");
148
149 return status == clblasSuccess;
150 }
151
152 #endif
153
154 #ifdef HAVE_OPENCL
ocl_gemm(InputArray matA,InputArray matB,double alpha,InputArray matC,double beta,OutputArray matD,int flags)155 static bool ocl_gemm( InputArray matA, InputArray matB, double alpha,
156 InputArray matC, double beta, OutputArray matD, int flags )
157 {
158 int depth = matA.depth(), cn = matA.channels();
159 int type = CV_MAKETYPE(depth, cn);
160
161 CV_Assert_N( type == matB.type(), (type == CV_32FC1 || type == CV_64FC1 || type == CV_32FC2 || type == CV_64FC2) );
162
163 const ocl::Device & dev = ocl::Device::getDefault();
164 bool doubleSupport = dev.doubleFPConfig() > 0;
165
166 if (!doubleSupport && depth == CV_64F)
167 return false;
168
169 bool haveC = matC.kind() != cv::_InputArray::NONE;
170 Size sizeA = matA.size(), sizeB = matB.size(), sizeC = haveC ? matC.size() : Size(0, 0);
171 bool atrans = (flags & GEMM_1_T) != 0, btrans = (flags & GEMM_2_T) != 0, ctrans = (flags & GEMM_3_T) != 0;
172
173 CV_Assert( !haveC || matC.type() == type );
174
175 Size sizeD(((btrans)? sizeB.height : sizeB.width),
176 ((atrans)? sizeA.width : sizeA.height));
177 matD.create(sizeD, type);
178
179 UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
180
181
182 if (!dev.intelSubgroupsSupport() || (depth == CV_64F) || cn != 1)
183 {
184 String opts;
185
186 if (atrans)
187 sizeA = Size(sizeA.height, sizeA.width);
188 if (btrans)
189 sizeB = Size(sizeB.height, sizeB.width);
190 if (haveC && ctrans)
191 sizeC = Size(sizeC.height, sizeC.width);
192
193 CV_Assert( sizeA.width == sizeB.height && (!haveC || sizeC == sizeD) );
194
195 int max_wg_size = (int)dev.maxWorkGroupSize();
196 int block_size = (max_wg_size / (32*cn) < 32) ? (max_wg_size / (16*cn) < 16) ? (max_wg_size / (8*cn) < 8) ? 1 : 8 : 16 : 32;
197
198 if (atrans)
199 A = A.t();
200
201 if (btrans)
202 B = B.t();
203
204 if (haveC)
205 ctrans ? transpose(matC, D) : matC.copyTo(D);
206
207 int vectorWidths[] = { 4, 4, 2, 2, 1, 4, cn, -1 };
208 int kercn = ocl::checkOptimalVectorWidth(vectorWidths, B, D);
209
210 opts += format(" -D T=%s -D T1=%s -D WT=%s -D cn=%d -D kercn=%d -D LOCAL_SIZE=%d%s%s%s",
211 ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(CV_MAKETYPE(depth, kercn)),
212 cn, kercn, block_size,
213 (sizeA.width % block_size !=0) ? " -D NO_MULT" : "",
214 haveC ? " -D HAVE_C" : "",
215 doubleSupport ? " -D DOUBLE_SUPPORT" : "");
216
217 ocl::Kernel k("gemm", cv::ocl::core::gemm_oclsrc, opts);
218 if (k.empty())
219 return false;
220
221 if (depth == CV_64F)
222 k.args(ocl::KernelArg::ReadOnlyNoSize(A),
223 ocl::KernelArg::ReadOnlyNoSize(B, cn, kercn),
224 ocl::KernelArg::ReadWrite(D, cn, kercn),
225 sizeA.width, alpha, beta);
226 else
227 k.args(ocl::KernelArg::ReadOnlyNoSize(A),
228 ocl::KernelArg::ReadOnlyNoSize(B, cn, kercn),
229 ocl::KernelArg::ReadWrite(D, cn, kercn),
230 sizeA.width, (float)alpha, (float)beta);
231
232 size_t globalsize[2] = { (size_t)sizeD.width * cn / kercn, (size_t)sizeD.height};
233 size_t localsize[2] = { (size_t)block_size, (size_t)block_size};
234
235 return k.run(2, globalsize, block_size!=1 ? localsize : NULL, false);
236 }
237 else
238 {
239 if (haveC && beta != 0.0)
240 {
241 ctrans ? transpose(matC, D) : matC.copyTo(D);
242 }
243 else
244 {
245 beta = 0.0;
246 }
247
248 return intel_gpu_gemm(A, sizeA,
249 B, sizeB,
250 D, sizeD,
251 alpha,
252 beta,
253 atrans, btrans);
254 }
255 }
256 #endif
257
258
259 namespace hal {
260
gemm32f(const float * src1,size_t src1_step,const float * src2,size_t src2_step,float alpha,const float * src3,size_t src3_step,float beta,float * dst,size_t dst_step,int m_a,int n_a,int n_d,int flags)261 void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
262 float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
263 int m_a, int n_a, int n_d, int flags)
264 {
265 CV_INSTRUMENT_REGION();
266 CALL_HAL(gemm32f, cv_hal_gemm32f, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
267 #ifdef CV_GEMM_BASELINE_ONLY
268 CV_CPU_CALL_BASELINE(gemm32f, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags));
269 #else
270 CV_CPU_DISPATCH(gemm32f, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags),
271 CV_CPU_DISPATCH_MODES_ALL);
272 #endif
273 }
274
gemm64f(const double * src1,size_t src1_step,const double * src2,size_t src2_step,double alpha,const double * src3,size_t src3_step,double beta,double * dst,size_t dst_step,int m_a,int n_a,int n_d,int flags)275 void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
276 double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
277 int m_a, int n_a, int n_d, int flags)
278 {
279 CV_INSTRUMENT_REGION();
280 CALL_HAL(gemm64f, cv_hal_gemm64f, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
281 #ifdef CV_GEMM_BASELINE_ONLY
282 CV_CPU_CALL_BASELINE(gemm64f, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags));
283 #else
284 CV_CPU_DISPATCH(gemm64f, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags),
285 CV_CPU_DISPATCH_MODES_ALL);
286 #endif
287 }
288
gemm32fc(const float * src1,size_t src1_step,const float * src2,size_t src2_step,float alpha,const float * src3,size_t src3_step,float beta,float * dst,size_t dst_step,int m_a,int n_a,int n_d,int flags)289 void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
290 float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
291 int m_a, int n_a, int n_d, int flags)
292 {
293 CV_INSTRUMENT_REGION();
294 CALL_HAL(gemm32fc, cv_hal_gemm32fc, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
295 #ifdef CV_GEMM_BASELINE_ONLY
296 CV_CPU_CALL_BASELINE(gemm32fc, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags));
297 #else
298 CV_CPU_DISPATCH(gemm32fc, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags),
299 CV_CPU_DISPATCH_MODES_ALL);
300 #endif
301 }
302
gemm64fc(const double * src1,size_t src1_step,const double * src2,size_t src2_step,double alpha,const double * src3,size_t src3_step,double beta,double * dst,size_t dst_step,int m_a,int n_a,int n_d,int flags)303 void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
304 double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
305 int m_a, int n_a, int n_d, int flags)
306 {
307 CV_INSTRUMENT_REGION();
308 CALL_HAL(gemm64fc, cv_hal_gemm64fc, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
309 #ifdef CV_GEMM_BASELINE_ONLY
310 CV_CPU_CALL_BASELINE(gemm64fc, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags));
311 #else
312 CV_CPU_DISPATCH(gemm64fc, (src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags),
313 CV_CPU_DISPATCH_MODES_ALL);
314 #endif
315 }
316
317 } // namespace hal
318
gemm(InputArray matA,InputArray matB,double alpha,InputArray matC,double beta,OutputArray _matD,int flags)319 void gemm(InputArray matA, InputArray matB, double alpha,
320 InputArray matC, double beta, OutputArray _matD, int flags)
321 {
322 #ifdef HAVE_CLAMDBLAS
323 CV_OCL_RUN(ocl::haveAmdBlas() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2 && _matD.isUMat() &&
324 matA.cols() > 20 && matA.rows() > 20 && matB.cols() > 20, // since it works incorrect for small sizes
325 ocl_gemm_amdblas(matA, matB, alpha, matC, beta, _matD, flags))
326 #endif
327
328 #ifdef HAVE_OPENCL
329 CV_OCL_RUN(_matD.isUMat() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2,
330 ocl_gemm(matA, matB, alpha, matC, beta, _matD, flags))
331 #endif
332
333 Mat A = matA.getMat(), B = matB.getMat(), C = beta != 0.0 ? matC.getMat() : Mat();
334 Size a_size = A.size(), d_size;
335 int len = 0, type = A.type();
336
337 CV_Assert_N( type == B.type(), (type == CV_32FC1 || type == CV_64FC1 || type == CV_32FC2 || type == CV_64FC2) );
338
339 switch( flags & (GEMM_1_T|GEMM_2_T) )
340 {
341 case 0:
342 d_size = Size( B.cols, a_size.height );
343 len = B.rows;
344 CV_Assert( a_size.width == len );
345 break;
346 case 1:
347 d_size = Size( B.cols, a_size.width );
348 len = B.rows;
349 CV_Assert( a_size.height == len );
350 break;
351 case 2:
352 d_size = Size( B.rows, a_size.height );
353 len = B.cols;
354 CV_Assert( a_size.width == len );
355 break;
356 case 3:
357 d_size = Size( B.rows, a_size.width );
358 len = B.cols;
359 CV_Assert( a_size.height == len );
360 break;
361 }
362
363 if( !C.empty() )
364 {
365 CV_Assert_N( C.type() == type,
366 (((flags&GEMM_3_T) == 0 && C.rows == d_size.height && C.cols == d_size.width) ||
367 ((flags&GEMM_3_T) != 0 && C.rows == d_size.width && C.cols == d_size.height)));
368 }
369
370 _matD.create( d_size.height, d_size.width, type );
371 Mat D = _matD.getMat();
372 if( (flags & GEMM_3_T) != 0 && C.data == D.data )
373 {
374 transpose( C, C );
375 flags &= ~GEMM_3_T;
376 }
377
378 Mat *DProxyPtr = &D, DProxy;
379 if( D.data == A.data || D.data == B.data )
380 {
381 DProxy = Mat(d_size.height, d_size.width, D.type());
382 DProxyPtr = &DProxy;
383 }
384
385 if( type == CV_32FC1 )
386 hal::gemm32f(A.ptr<float>(), A.step, B.ptr<float>(), B.step, static_cast<float>(alpha),
387 C.ptr<float>(), C.step, static_cast<float>(beta),
388 DProxyPtr->ptr<float>(), DProxyPtr->step,
389 a_size.height, a_size.width, DProxyPtr->cols, flags);
390 else if( type == CV_64FC1 )
391 hal::gemm64f(A.ptr<double>(), A.step, B.ptr<double>(), B.step, alpha,
392 C.ptr<double>(), C.step, beta,
393 DProxyPtr->ptr<double>(), DProxyPtr->step,
394 a_size.height, a_size.width, DProxyPtr->cols, flags);
395 else if( type == CV_32FC2 )
396 hal::gemm32fc(A.ptr<float>(), A.step, B.ptr<float>(), B.step, static_cast<float>(alpha),
397 C.ptr<float>(), C.step, static_cast<float>(beta),
398 DProxyPtr->ptr<float>(), DProxyPtr->step,
399 a_size.height, a_size.width, DProxyPtr->cols, flags);
400 else
401 {
402 CV_Assert( type == CV_64FC2 );
403 hal::gemm64fc(A.ptr<double>(), A.step, B.ptr<double>(), B.step, alpha,
404 C.ptr<double>(), C.step, beta,
405 D.ptr<double>(), D.step,
406 a_size.height, a_size.width, DProxyPtr->cols, flags);
407 }
408
409 if(DProxyPtr != &D)
410 DProxyPtr->copyTo(D);
411 }
412
413
414
415 /****************************************************************************************\
416 * Transform *
417 \****************************************************************************************/
418
getTransformFunc(int depth)419 static TransformFunc getTransformFunc(int depth)
420 {
421 CV_INSTRUMENT_REGION();
422 CV_CPU_DISPATCH(getTransformFunc, (depth),
423 CV_CPU_DISPATCH_MODES_ALL);
424 }
425
getDiagTransformFunc(int depth)426 static TransformFunc getDiagTransformFunc(int depth)
427 {
428 CV_INSTRUMENT_REGION();
429 CV_CPU_DISPATCH(getDiagTransformFunc, (depth),
430 CV_CPU_DISPATCH_MODES_ALL);
431 }
432
transform(InputArray _src,OutputArray _dst,InputArray _mtx)433 void transform(InputArray _src, OutputArray _dst, InputArray _mtx)
434 {
435 CV_INSTRUMENT_REGION();
436
437 Mat src = _src.getMat(), m = _mtx.getMat();
438 int depth = src.depth(), scn = src.channels(), dcn = m.rows;
439 CV_Assert( scn == m.cols || scn + 1 == m.cols );
440 bool isDiag = false;
441
442 _dst.create( src.size(), CV_MAKETYPE(depth, dcn) );
443 Mat dst = _dst.getMat();
444
445 if (src.data == dst.data) // inplace case
446 {
447 CV_Assert(scn == dcn);
448 src = src.clone(); // TODO Add performance warning
449 }
450
451 int mtype = depth == CV_32S || depth == CV_64F ? CV_64F : CV_32F;
452 AutoBuffer<double> _mbuf;
453 double* mbuf;
454
455 if( !m.isContinuous() || m.type() != mtype || m.cols != scn + 1 )
456 {
457 _mbuf.allocate(dcn*(scn+1));
458 mbuf = _mbuf.data();
459 Mat tmp(dcn, scn+1, mtype, mbuf);
460 memset(tmp.ptr(), 0, tmp.total()*tmp.elemSize());
461 if( m.cols == scn+1 )
462 m.convertTo(tmp, mtype);
463 else
464 {
465 Mat tmppart = tmp.colRange(0, m.cols);
466 m.convertTo(tmppart, mtype);
467 }
468 m = tmp;
469 }
470 else
471 mbuf = m.ptr<double>();
472
473 if( scn == dcn )
474 {
475 int i, j;
476 double eps = mtype == CV_32F ? FLT_EPSILON : DBL_EPSILON;
477
478 if( scn == 1 )
479 {
480 double alpha, beta;
481 if( mtype == CV_32F )
482 alpha = m.at<float>(0), beta = m.at<float>(1);
483 else
484 alpha = m.at<double>(0), beta = m.at<double>(1);
485 src.convertTo(dst, dst.type(), alpha, beta);
486 return;
487 }
488
489 for( i = 0, isDiag = true; isDiag && i < scn; i++ )
490 {
491 for( j = 0; isDiag && j < scn; j++ )
492 {
493 double v = mtype == CV_32F ? m.at<float>(i, j) : m.at<double>(i, j);
494 if( i != j && fabs(v) > eps )
495 isDiag = false;
496 }
497 }
498 }
499
500 TransformFunc func = isDiag ? getDiagTransformFunc(depth): getTransformFunc(depth);
501 CV_Assert( func != 0 );
502
503 const Mat* arrays[] = {&src, &dst, 0};
504 uchar* ptrs[2] = {};
505 NAryMatIterator it(arrays, ptrs);
506 size_t i, total = it.size;
507
508 for( i = 0; i < it.nplanes; i++, ++it )
509 func( ptrs[0], ptrs[1], (uchar*)mbuf, (int)total, scn, dcn );
510 }
511
512
513
514 /****************************************************************************************\
515 * Perspective Transform *
516 \****************************************************************************************/
517
getPerspectiveTransform(int depth)518 static TransformFunc getPerspectiveTransform(int depth)
519 {
520 CV_INSTRUMENT_REGION();
521 CV_CPU_DISPATCH(getPerspectiveTransform, (depth),
522 CV_CPU_DISPATCH_MODES_ALL);
523 }
524
perspectiveTransform(InputArray _src,OutputArray _dst,InputArray _mtx)525 void perspectiveTransform(InputArray _src, OutputArray _dst, InputArray _mtx)
526 {
527 CV_INSTRUMENT_REGION();
528
529 Mat src = _src.getMat(), m = _mtx.getMat();
530 int depth = src.depth(), scn = src.channels(), dcn = m.rows-1;
531 CV_Assert( scn + 1 == m.cols );
532 CV_Assert( depth == CV_32F || depth == CV_64F );
533
534 _dst.create( src.size(), CV_MAKETYPE(depth, dcn) );
535 Mat dst = _dst.getMat();
536
537 const int mtype = CV_64F;
538 AutoBuffer<double> _mbuf;
539 double* mbuf = m.ptr<double>();
540
541 if( !m.isContinuous() || m.type() != mtype )
542 {
543 _mbuf.allocate((dcn+1)*(scn+1));
544 mbuf = _mbuf.data();
545 Mat tmp(dcn+1, scn+1, mtype, mbuf);
546 m.convertTo(tmp, mtype);
547 m = tmp;
548 }
549
550 TransformFunc func = getPerspectiveTransform(depth);
551 CV_Assert( func != 0 );
552
553 const Mat* arrays[] = {&src, &dst, 0};
554 uchar* ptrs[2] = {};
555 NAryMatIterator it(arrays, ptrs);
556 size_t i, total = it.size;
557
558 for( i = 0; i < it.nplanes; i++, ++it )
559 func( ptrs[0], ptrs[1], (uchar*)mbuf, (int)total, scn, dcn );
560 }
561
562 /****************************************************************************************\
563 * ScaleAdd *
564 \****************************************************************************************/
565
566 #ifdef HAVE_OPENCL
567
ocl_scaleAdd(InputArray _src1,double alpha,InputArray _src2,OutputArray _dst,int type)568 static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type )
569 {
570 const ocl::Device & d = ocl::Device::getDefault();
571
572 bool doubleSupport = d.doubleFPConfig() > 0;
573 Size size = _src1.size();
574 int depth = CV_MAT_DEPTH(type);
575 if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() )
576 return false;
577
578 _dst.create(size, type);
579 int cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F);
580 int kercn = ocl::predictOptimalVectorWidthMax(_src1, _src2, _dst),
581 rowsPerWI = d.isIntel() ? 4 : 1;
582
583 char cvt[2][50];
584 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
585 format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D DEPTH_dst=%d -D workT=%s -D convertToWT1=%s"
586 " -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s -D workT1=%s"
587 " -D wdepth=%d%s -D rowsPerWI=%d",
588 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), depth,
589 ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)),
590 ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]),
591 ocl::convertTypeStr(wdepth, depth, kercn, cvt[1]),
592 ocl::typeToStr(wdepth), wdepth,
593 doubleSupport ? " -D DOUBLE_SUPPORT" : "", rowsPerWI));
594 if (k.empty())
595 return false;
596
597 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat();
598
599 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
600 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
601 dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
602
603 if (wdepth == CV_32F)
604 k.args(src1arg, src2arg, dstarg, (float)alpha);
605 else
606 k.args(src1arg, src2arg, dstarg, alpha);
607
608 size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
609 return k.run(2, globalsize, NULL, false);
610 }
611
612 #endif
613
getScaleAddFunc(int depth)614 static ScaleAddFunc getScaleAddFunc(int depth)
615 {
616 CV_INSTRUMENT_REGION();
617 CV_CPU_DISPATCH(getScaleAddFunc, (depth),
618 CV_CPU_DISPATCH_MODES_ALL);
619 }
620
scaleAdd(InputArray _src1,double alpha,InputArray _src2,OutputArray _dst)621 void scaleAdd(InputArray _src1, double alpha, InputArray _src2, OutputArray _dst)
622 {
623 CV_INSTRUMENT_REGION();
624
625 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
626 CV_Assert( type == _src2.type() );
627
628 CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat(),
629 ocl_scaleAdd(_src1, alpha, _src2, _dst, type))
630
631 if( depth < CV_32F )
632 {
633 addWeighted(_src1, alpha, _src2, 1, 0, _dst, depth);
634 return;
635 }
636
637 Mat src1 = _src1.getMat(), src2 = _src2.getMat();
638 CV_Assert(src1.size == src2.size);
639
640 _dst.create(src1.dims, src1.size, type);
641 Mat dst = _dst.getMat();
642
643 float falpha = (float)alpha;
644 void* palpha = depth == CV_32F ? (void*)&falpha : (void*)α
645
646 ScaleAddFunc func = getScaleAddFunc(depth);
647 CV_Assert(func);
648
649 if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous())
650 {
651 size_t len = src1.total()*cn;
652 func(src1.ptr(), src2.ptr(), dst.ptr(), (int)len, palpha);
653 return;
654 }
655
656 const Mat* arrays[] = {&src1, &src2, &dst, 0};
657 uchar* ptrs[3] = {};
658 NAryMatIterator it(arrays, ptrs);
659 size_t i, len = it.size*cn;
660
661 for( i = 0; i < it.nplanes; i++, ++it )
662 func( ptrs[0], ptrs[1], ptrs[2], (int)len, palpha );
663 }
664
665 /****************************************************************************************\
666 * Covariation Matrix *
667 \****************************************************************************************/
668
calcCovarMatrix(const Mat * data,int nsamples,Mat & covar,Mat & _mean,int flags,int ctype)669 void calcCovarMatrix( const Mat* data, int nsamples, Mat& covar, Mat& _mean, int flags, int ctype )
670 {
671 CV_INSTRUMENT_REGION();
672
673 CV_Assert_N( data, nsamples > 0 );
674 Size size = data[0].size();
675 int sz = size.width * size.height, esz = (int)data[0].elemSize();
676 int type = data[0].type();
677 Mat mean;
678 ctype = std::max(std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), _mean.depth()), CV_32F);
679
680 if( (flags & CV_COVAR_USE_AVG) != 0 )
681 {
682 CV_Assert( _mean.size() == size );
683 if( _mean.isContinuous() && _mean.type() == ctype )
684 mean = _mean.reshape(1, 1);
685 else
686 {
687 _mean.convertTo(mean, ctype);
688 mean = mean.reshape(1, 1);
689 }
690 }
691
692 Mat _data(nsamples, sz, type);
693
694 for( int i = 0; i < nsamples; i++ )
695 {
696 CV_Assert_N( data[i].size() == size, data[i].type() == type );
697 if( data[i].isContinuous() )
698 memcpy( _data.ptr(i), data[i].ptr(), sz*esz );
699 else
700 {
701 Mat dataRow(size.height, size.width, type, _data.ptr(i));
702 data[i].copyTo(dataRow);
703 }
704 }
705
706 calcCovarMatrix( _data, covar, mean, (flags & ~(CV_COVAR_ROWS|CV_COVAR_COLS)) | CV_COVAR_ROWS, ctype );
707 if( (flags & CV_COVAR_USE_AVG) == 0 )
708 _mean = mean.reshape(1, size.height);
709 }
710
calcCovarMatrix(InputArray _src,OutputArray _covar,InputOutputArray _mean,int flags,int ctype)711 void calcCovarMatrix( InputArray _src, OutputArray _covar, InputOutputArray _mean, int flags, int ctype )
712 {
713 CV_INSTRUMENT_REGION();
714
715 if(_src.kind() == _InputArray::STD_VECTOR_MAT || _src.kind() == _InputArray::STD_ARRAY_MAT)
716 {
717 std::vector<cv::Mat> src;
718 _src.getMatVector(src);
719
720 CV_Assert( src.size() > 0 );
721
722 Size size = src[0].size();
723 int type = src[0].type();
724
725 ctype = std::max(std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), _mean.depth()), CV_32F);
726
727 Mat _data(static_cast<int>(src.size()), size.area(), type);
728
729 int i = 0;
730 for(std::vector<cv::Mat>::iterator each = src.begin(); each != src.end(); ++each, ++i )
731 {
732 CV_Assert_N( (*each).size() == size, (*each).type() == type );
733 Mat dataRow(size.height, size.width, type, _data.ptr(i));
734 (*each).copyTo(dataRow);
735 }
736
737 Mat mean;
738 if( (flags & CV_COVAR_USE_AVG) != 0 )
739 {
740 CV_Assert( _mean.size() == size );
741
742 if( mean.type() != ctype )
743 {
744 mean = _mean.getMat();
745 _mean.create(mean.size(), ctype);
746 Mat tmp = _mean.getMat();
747 mean.convertTo(tmp, ctype);
748 mean = tmp;
749 }
750
751 mean = _mean.getMat().reshape(1, 1);
752 }
753
754 calcCovarMatrix( _data, _covar, mean, (flags & ~(CV_COVAR_ROWS|CV_COVAR_COLS)) | CV_COVAR_ROWS, ctype );
755
756 if( (flags & CV_COVAR_USE_AVG) == 0 )
757 {
758 mean = mean.reshape(1, size.height);
759 mean.copyTo(_mean);
760 }
761 return;
762 }
763
764 Mat data = _src.getMat(), mean;
765 CV_Assert( ((flags & CV_COVAR_ROWS) != 0) ^ ((flags & CV_COVAR_COLS) != 0) );
766 bool takeRows = (flags & CV_COVAR_ROWS) != 0;
767 int type = data.type();
768 int nsamples = takeRows ? data.rows : data.cols;
769 CV_Assert( nsamples > 0 );
770 Size size = takeRows ? Size(data.cols, 1) : Size(1, data.rows);
771
772 if( (flags & CV_COVAR_USE_AVG) != 0 )
773 {
774 mean = _mean.getMat();
775 ctype = std::max(std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), mean.depth()), CV_32F);
776 CV_Assert( mean.size() == size );
777 if( mean.type() != ctype )
778 {
779 _mean.create(mean.size(), ctype);
780 Mat tmp = _mean.getMat();
781 mean.convertTo(tmp, ctype);
782 mean = tmp;
783 }
784 }
785 else
786 {
787 ctype = std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), CV_32F);
788 reduce( _src, _mean, takeRows ? 0 : 1, CV_REDUCE_AVG, ctype );
789 mean = _mean.getMat();
790 }
791
792 mulTransposed( data, _covar, ((flags & CV_COVAR_NORMAL) == 0) ^ takeRows,
793 mean, (flags & CV_COVAR_SCALE) != 0 ? 1./nsamples : 1, ctype );
794 }
795
796
797
798 /****************************************************************************************\
799 * Mahalanobis *
800 \****************************************************************************************/
801
getMahalanobisImplFunc(int depth)802 static MahalanobisImplFunc getMahalanobisImplFunc(int depth)
803 {
804 #ifdef CV_MAHALANOBIS_BASELINE_ONLY
805 CV_CPU_CALL_BASELINE(getMahalanobisImplFunc, (depth));
806 #else
807 CV_INSTRUMENT_REGION();
808 CV_CPU_DISPATCH(getMahalanobisImplFunc, (depth),
809 CV_CPU_DISPATCH_MODES_ALL);
810 #endif
811 }
812
813
Mahalanobis(InputArray _v1,InputArray _v2,InputArray _icovar)814 double Mahalanobis(InputArray _v1, InputArray _v2, InputArray _icovar)
815 {
816 CV_INSTRUMENT_REGION();
817
818 Mat v1 = _v1.getMat(), v2 = _v2.getMat(), icovar = _icovar.getMat();
819 int type = v1.type(), depth = v1.depth();
820 Size sz = v1.size();
821 int len = sz.width*sz.height*v1.channels();
822 AutoBuffer<double> buf(len);
823
824 CV_Assert_N( type == v2.type(), type == icovar.type(),
825 sz == v2.size(), len == icovar.rows && len == icovar.cols );
826
827 sz.width *= v1.channels();
828 if( v1.isContinuous() && v2.isContinuous() )
829 {
830 sz.width *= sz.height;
831 sz.height = 1;
832 }
833
834 MahalanobisImplFunc func = getMahalanobisImplFunc(depth);
835 CV_Assert(func);
836
837 double result = func(v1, v2, icovar, buf.data(), len);
838 return std::sqrt(result);
839 }
840
841
842
843 /****************************************************************************************\
844 * MulTransposed *
845 \****************************************************************************************/
846
getMulTransposedFunc(int stype,int dtype,bool ata)847 static MulTransposedFunc getMulTransposedFunc(int stype, int dtype, bool ata)
848 {
849 #ifdef CV_MULTRANSPOSED_BASELINE_ONLY
850 CV_CPU_CALL_BASELINE(getMulTransposedFunc, (stype, dtype, ata));
851 #else
852 CV_INSTRUMENT_REGION();
853 CV_CPU_DISPATCH(getMulTransposedFunc, (stype, dtype, ata),
854 CV_CPU_DISPATCH_MODES_ALL);
855 #endif
856 }
857
mulTransposed(InputArray _src,OutputArray _dst,bool ata,InputArray _delta,double scale,int dtype)858 void mulTransposed(InputArray _src, OutputArray _dst, bool ata,
859 InputArray _delta, double scale, int dtype)
860 {
861 CV_INSTRUMENT_REGION();
862
863 Mat src = _src.getMat(), delta = _delta.getMat();
864 const int gemm_level = 100; // boundary above which GEMM is faster.
865 int stype = src.type();
866 dtype = std::max(std::max(CV_MAT_DEPTH(dtype >= 0 ? dtype : stype), delta.depth()), CV_32F);
867 CV_Assert( src.channels() == 1 );
868
869 if( !delta.empty() )
870 {
871 CV_Assert_N( delta.channels() == 1,
872 (delta.rows == src.rows || delta.rows == 1),
873 (delta.cols == src.cols || delta.cols == 1));
874 if( delta.type() != dtype )
875 delta.convertTo(delta, dtype);
876 }
877
878 int dsize = ata ? src.cols : src.rows;
879 _dst.create( dsize, dsize, dtype );
880 Mat dst = _dst.getMat();
881
882 if( src.data == dst.data || (stype == dtype &&
883 (dst.cols >= gemm_level && dst.rows >= gemm_level &&
884 src.cols >= gemm_level && src.rows >= gemm_level)))
885 {
886 Mat src2;
887 const Mat* tsrc = &src;
888 if( !delta.empty() )
889 {
890 if( delta.size() == src.size() )
891 subtract( src, delta, src2 );
892 else
893 {
894 repeat(delta, src.rows/delta.rows, src.cols/delta.cols, src2);
895 subtract( src, src2, src2 );
896 }
897 tsrc = &src2;
898 }
899 gemm( *tsrc, *tsrc, scale, Mat(), 0, dst, ata ? GEMM_1_T : GEMM_2_T );
900 }
901 else
902 {
903 MulTransposedFunc func = getMulTransposedFunc(stype, dtype, ata);
904 if( !func )
905 CV_Error( CV_StsUnsupportedFormat, "" );
906
907 func( src, dst, delta, scale );
908 completeSymm( dst, false );
909 }
910 }
911
912 /****************************************************************************************\
913 * Dot Product *
914 \****************************************************************************************/
915
dotProd_8u(const uchar * src1,const uchar * src2,int len)916 static double dotProd_8u(const uchar* src1, const uchar* src2, int len)
917 {
918 CV_INSTRUMENT_REGION();
919 CV_CPU_DISPATCH(dotProd_8u, (src1, src2, len),
920 CV_CPU_DISPATCH_MODES_ALL);
921 }
dotProd_8s(const schar * src1,const schar * src2,int len)922 static double dotProd_8s(const schar* src1, const schar* src2, int len)
923 {
924 CV_INSTRUMENT_REGION();
925 CV_CPU_DISPATCH(dotProd_8s, (src1, src2, len),
926 CV_CPU_DISPATCH_MODES_ALL);
927 }
dotProd_16u(const ushort * src1,const ushort * src2,int len)928 static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
929 {
930 CV_INSTRUMENT_REGION();
931 CV_CPU_DISPATCH(dotProd_16u, (src1, src2, len),
932 CV_CPU_DISPATCH_MODES_ALL);
933 }
dotProd_16s(const short * src1,const short * src2,int len)934 static double dotProd_16s(const short* src1, const short* src2, int len)
935 {
936 CV_INSTRUMENT_REGION();
937 CV_CPU_DISPATCH(dotProd_16s, (src1, src2, len),
938 CV_CPU_DISPATCH_MODES_ALL);
939 }
dotProd_32s(const int * src1,const int * src2,int len)940 static double dotProd_32s(const int* src1, const int* src2, int len)
941 {
942 CV_INSTRUMENT_REGION();
943 CV_CPU_DISPATCH(dotProd_32s, (src1, src2, len),
944 CV_CPU_DISPATCH_MODES_ALL);
945 }
dotProd_32f(const float * src1,const float * src2,int len)946 static double dotProd_32f(const float* src1, const float* src2, int len)
947 {
948 CV_INSTRUMENT_REGION();
949 CV_CPU_DISPATCH(dotProd_32f, (src1, src2, len),
950 CV_CPU_DISPATCH_MODES_ALL);
951 }
dotProd_64f(const double * src1,const double * src2,int len)952 static double dotProd_64f(const double* src1, const double* src2, int len)
953 {
954 CV_INSTRUMENT_REGION();
955 CV_CPU_DISPATCH(dotProd_64f, (src1, src2, len),
956 CV_CPU_DISPATCH_MODES_ALL);
957 }
958
959 typedef double (*DotProdFunc)(const uchar* src1, const uchar* src2, int len);
960
getDotProdFunc(int depth)961 static DotProdFunc getDotProdFunc(int depth)
962 {
963 static DotProdFunc dotProdTab[] =
964 {
965 (DotProdFunc)GET_OPTIMIZED(dotProd_8u), (DotProdFunc)GET_OPTIMIZED(dotProd_8s),
966 (DotProdFunc)dotProd_16u, (DotProdFunc)dotProd_16s,
967 (DotProdFunc)dotProd_32s, (DotProdFunc)GET_OPTIMIZED(dotProd_32f),
968 (DotProdFunc)dotProd_64f, 0
969 };
970
971 return dotProdTab[depth];
972 }
973
dot(InputArray _mat) const974 double Mat::dot(InputArray _mat) const
975 {
976 CV_INSTRUMENT_REGION();
977
978 Mat mat = _mat.getMat();
979 int cn = channels();
980 DotProdFunc func = getDotProdFunc(depth());
981 CV_Assert_N( mat.type() == type(), mat.size == size, func != 0 );
982
983 if( isContinuous() && mat.isContinuous() )
984 {
985 size_t len = total()*cn;
986 if( len == (size_t)(int)len )
987 return func(data, mat.data, (int)len);
988 }
989
990 const Mat* arrays[] = {this, &mat, 0};
991 uchar* ptrs[2] = {};
992 NAryMatIterator it(arrays, ptrs);
993 int len = (int)(it.size*cn);
994 double r = 0;
995
996 for( size_t i = 0; i < it.nplanes; i++, ++it )
997 r += func( ptrs[0], ptrs[1], len );
998
999 return r;
1000 }
1001
1002
1003 #ifdef HAVE_OPENCL
1004
ocl_dot(InputArray _src1,InputArray _src2,double & res)1005 static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
1006 {
1007 UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
1008
1009 int type = src1.type(), depth = CV_MAT_DEPTH(type),
1010 kercn = ocl::predictOptimalVectorWidth(src1, src2);
1011 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
1012
1013 if ( !doubleSupport && depth == CV_64F )
1014 return false;
1015
1016 int dbsize = ocl::Device::getDefault().maxComputeUnits();
1017 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
1018 int ddepth = std::max(CV_32F, depth);
1019
1020 int wgs2_aligned = 1;
1021 while (wgs2_aligned < (int)wgs)
1022 wgs2_aligned <<= 1;
1023 wgs2_aligned >>= 1;
1024
1025 char cvt[40];
1026 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
1027 format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
1028 "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
1029 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
1030 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
1031 ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
1032 (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
1033 _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
1034 _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
1035 if (k.empty())
1036 return false;
1037
1038 UMat db(1, dbsize, ddepth);
1039
1040 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
1041 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
1042 dbarg = ocl::KernelArg::PtrWriteOnly(db);
1043
1044 k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
1045
1046 size_t globalsize = dbsize * wgs;
1047 if (k.run(1, &globalsize, &wgs, true))
1048 {
1049 res = sum(db.getMat(ACCESS_READ))[0];
1050 return true;
1051 }
1052 return false;
1053 }
1054
1055 #endif
1056
dot(InputArray m) const1057 double UMat::dot(InputArray m) const
1058 {
1059 CV_INSTRUMENT_REGION();
1060
1061 CV_Assert(m.sameSize(*this) && m.type() == type());
1062
1063 #ifdef HAVE_OPENCL
1064 double r = 0;
1065 CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
1066 #endif
1067
1068 return getMat(ACCESS_READ).dot(m);
1069 }
1070
1071 } // namespace cv::
1072
1073
1074 #ifndef OPENCV_EXCLUDE_C_API
1075 /****************************************************************************************\
1076 * Earlier API *
1077 \****************************************************************************************/
1078
cvGEMM(const CvArr * Aarr,const CvArr * Barr,double alpha,const CvArr * Carr,double beta,CvArr * Darr,int flags)1079 CV_IMPL void cvGEMM( const CvArr* Aarr, const CvArr* Barr, double alpha,
1080 const CvArr* Carr, double beta, CvArr* Darr, int flags )
1081 {
1082 cv::Mat A = cv::cvarrToMat(Aarr), B = cv::cvarrToMat(Barr);
1083 cv::Mat C, D = cv::cvarrToMat(Darr);
1084
1085 if( Carr )
1086 C = cv::cvarrToMat(Carr);
1087
1088 CV_Assert_N( (D.rows == ((flags & CV_GEMM_A_T) == 0 ? A.rows : A.cols)),
1089 (D.cols == ((flags & CV_GEMM_B_T) == 0 ? B.cols : B.rows)),
1090 D.type() == A.type() );
1091
1092 gemm( A, B, alpha, C, beta, D, flags );
1093 }
1094
1095
1096 CV_IMPL void
cvTransform(const CvArr * srcarr,CvArr * dstarr,const CvMat * transmat,const CvMat * shiftvec)1097 cvTransform( const CvArr* srcarr, CvArr* dstarr,
1098 const CvMat* transmat, const CvMat* shiftvec )
1099 {
1100 cv::Mat m = cv::cvarrToMat(transmat), src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1101
1102 if( shiftvec )
1103 {
1104 cv::Mat v = cv::cvarrToMat(shiftvec).reshape(1,m.rows),
1105 _m(m.rows, m.cols + 1, m.type()), m1 = _m.colRange(0,m.cols), v1 = _m.col(m.cols);
1106 m.convertTo(m1, m1.type());
1107 v.convertTo(v1, v1.type());
1108 m = _m;
1109 }
1110
1111 CV_Assert_N( dst.depth() == src.depth(), dst.channels() == m.rows );
1112 cv::transform( src, dst, m );
1113 }
1114
1115
1116 CV_IMPL void
cvPerspectiveTransform(const CvArr * srcarr,CvArr * dstarr,const CvMat * mat)1117 cvPerspectiveTransform( const CvArr* srcarr, CvArr* dstarr, const CvMat* mat )
1118 {
1119 cv::Mat m = cv::cvarrToMat(mat), src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1120
1121 CV_Assert_N( dst.type() == src.type(), dst.channels() == m.rows-1 );
1122 cv::perspectiveTransform( src, dst, m );
1123 }
1124
1125
cvScaleAdd(const CvArr * srcarr1,CvScalar scale,const CvArr * srcarr2,CvArr * dstarr)1126 CV_IMPL void cvScaleAdd( const CvArr* srcarr1, CvScalar scale,
1127 const CvArr* srcarr2, CvArr* dstarr )
1128 {
1129 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
1130
1131 CV_Assert_N( src1.size == dst.size, src1.type() == dst.type() );
1132 cv::scaleAdd( src1, scale.val[0], cv::cvarrToMat(srcarr2), dst );
1133 }
1134
1135
1136 CV_IMPL void
cvCalcCovarMatrix(const CvArr ** vecarr,int count,CvArr * covarr,CvArr * avgarr,int flags)1137 cvCalcCovarMatrix( const CvArr** vecarr, int count,
1138 CvArr* covarr, CvArr* avgarr, int flags )
1139 {
1140 cv::Mat cov0 = cv::cvarrToMat(covarr), cov = cov0, mean0, mean;
1141 CV_Assert_N( vecarr != 0, count >= 1 );
1142
1143 if( avgarr )
1144 mean = mean0 = cv::cvarrToMat(avgarr);
1145
1146 if( (flags & CV_COVAR_COLS) != 0 || (flags & CV_COVAR_ROWS) != 0 )
1147 {
1148
1149 cv::Mat data = cv::cvarrToMat(vecarr[0]);
1150 cv::calcCovarMatrix( data, cov, mean, flags, cov.type() );
1151 }
1152 else
1153 {
1154 std::vector<cv::Mat> data(count);
1155 for( int i = 0; i < count; i++ )
1156 data[i] = cv::cvarrToMat(vecarr[i]);
1157 cv::calcCovarMatrix( &data[0], count, cov, mean, flags, cov.type() );
1158 }
1159
1160 if( mean.data != mean0.data && mean0.data )
1161 mean.convertTo(mean0, mean0.type());
1162
1163 if( cov.data != cov0.data )
1164 cov.convertTo(cov0, cov0.type());
1165 }
1166
1167
1168 CV_IMPL double
cvMahalanobis(const CvArr * srcAarr,const CvArr * srcBarr,const CvArr * matarr)1169 cvMahalanobis( const CvArr* srcAarr, const CvArr* srcBarr, const CvArr* matarr )
1170 {
1171 return cv::Mahalanobis(cv::cvarrToMat(srcAarr),
1172 cv::cvarrToMat(srcBarr), cv::cvarrToMat(matarr));
1173 }
1174
1175 CV_IMPL void
cvMulTransposed(const CvArr * srcarr,CvArr * dstarr,int order,const CvArr * deltaarr,double scale)1176 cvMulTransposed( const CvArr* srcarr, CvArr* dstarr,
1177 int order, const CvArr* deltaarr, double scale )
1178 {
1179 cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0, delta;
1180 if( deltaarr )
1181 delta = cv::cvarrToMat(deltaarr);
1182 cv::mulTransposed( src, dst, order != 0, delta, scale, dst.type());
1183 if( dst.data != dst0.data )
1184 dst.convertTo(dst0, dst0.type());
1185 }
1186
cvDotProduct(const CvArr * srcAarr,const CvArr * srcBarr)1187 CV_IMPL double cvDotProduct( const CvArr* srcAarr, const CvArr* srcBarr )
1188 {
1189 return cv::cvarrToMat(srcAarr).dot(cv::cvarrToMat(srcBarr));
1190 }
1191
1192
1193 CV_IMPL void
cvCalcPCA(const CvArr * data_arr,CvArr * avg_arr,CvArr * eigenvals,CvArr * eigenvects,int flags)1194 cvCalcPCA( const CvArr* data_arr, CvArr* avg_arr, CvArr* eigenvals, CvArr* eigenvects, int flags )
1195 {
1196 cv::Mat data = cv::cvarrToMat(data_arr), mean0 = cv::cvarrToMat(avg_arr);
1197 cv::Mat evals0 = cv::cvarrToMat(eigenvals), evects0 = cv::cvarrToMat(eigenvects);
1198 cv::Mat mean = mean0, evals = evals0, evects = evects0;
1199
1200 cv::PCA pca;
1201 pca.mean = mean;
1202 pca.eigenvalues = evals;
1203 pca.eigenvectors = evects;
1204
1205 pca(data, (flags & CV_PCA_USE_AVG) ? mean : cv::Mat(),
1206 flags, !evals.empty() ? evals.rows + evals.cols - 1 : 0);
1207
1208 if( pca.mean.size() == mean.size() )
1209 pca.mean.convertTo( mean, mean.type() );
1210 else
1211 {
1212 cv::Mat temp; pca.mean.convertTo( temp, mean.type() );
1213 transpose( temp, mean );
1214 }
1215
1216 evals = pca.eigenvalues;
1217 evects = pca.eigenvectors;
1218 int ecount0 = evals0.cols + evals0.rows - 1;
1219 int ecount = evals.cols + evals.rows - 1;
1220
1221 CV_Assert_N( (evals0.cols == 1 || evals0.rows == 1),
1222 ecount0 <= ecount,
1223 evects0.cols == evects.cols,
1224 evects0.rows == ecount0 );
1225
1226 cv::Mat temp = evals0;
1227 if( evals.rows == 1 )
1228 evals.colRange(0, ecount0).convertTo(temp, evals0.type());
1229 else
1230 evals.rowRange(0, ecount0).convertTo(temp, evals0.type());
1231 if( temp.data != evals0.data )
1232 transpose(temp, evals0);
1233 evects.rowRange(0, ecount0).convertTo( evects0, evects0.type() );
1234
1235 // otherwise some datatype's or size's were incorrect, so the output arrays have been reallocated
1236 CV_Assert( mean0.data == mean.data );
1237 }
1238
1239
1240 CV_IMPL void
cvProjectPCA(const CvArr * data_arr,const CvArr * avg_arr,const CvArr * eigenvects,CvArr * result_arr)1241 cvProjectPCA( const CvArr* data_arr, const CvArr* avg_arr,
1242 const CvArr* eigenvects, CvArr* result_arr )
1243 {
1244 cv::Mat data = cv::cvarrToMat(data_arr), mean = cv::cvarrToMat(avg_arr);
1245 cv::Mat evects = cv::cvarrToMat(eigenvects), dst0 = cv::cvarrToMat(result_arr), dst = dst0;
1246
1247 cv::PCA pca;
1248 pca.mean = mean;
1249 int n;
1250 if( mean.rows == 1 )
1251 {
1252 CV_Assert_N(dst.cols <= evects.rows, dst.rows == data.rows);
1253 n = dst.cols;
1254 }
1255 else
1256 {
1257 CV_Assert_N(dst.rows <= evects.rows, dst.cols == data.cols);
1258 n = dst.rows;
1259 }
1260 pca.eigenvectors = evects.rowRange(0, n);
1261
1262 cv::Mat result = pca.project(data);
1263 if( result.cols != dst.cols )
1264 result = result.reshape(1, 1);
1265 result.convertTo(dst, dst.type());
1266
1267 CV_Assert(dst0.data == dst.data);
1268 }
1269
1270
1271 CV_IMPL void
cvBackProjectPCA(const CvArr * proj_arr,const CvArr * avg_arr,const CvArr * eigenvects,CvArr * result_arr)1272 cvBackProjectPCA( const CvArr* proj_arr, const CvArr* avg_arr,
1273 const CvArr* eigenvects, CvArr* result_arr )
1274 {
1275 cv::Mat data = cv::cvarrToMat(proj_arr), mean = cv::cvarrToMat(avg_arr);
1276 cv::Mat evects = cv::cvarrToMat(eigenvects), dst0 = cv::cvarrToMat(result_arr), dst = dst0;
1277
1278 cv::PCA pca;
1279 pca.mean = mean;
1280 int n;
1281 if( mean.rows == 1 )
1282 {
1283 CV_Assert_N(data.cols <= evects.rows, dst.rows == data.rows);
1284 n = data.cols;
1285 }
1286 else
1287 {
1288 CV_Assert_N(data.rows <= evects.rows, dst.cols == data.cols);
1289 n = data.rows;
1290 }
1291 pca.eigenvectors = evects.rowRange(0, n);
1292
1293 cv::Mat result = pca.backProject(data);
1294 result.convertTo(dst, dst.type());
1295
1296 CV_Assert(dst0.data == dst.data);
1297 }
1298
1299 #endif // OPENCV_EXCLUDE_C_API
1300
1301 /* End of file. */
1302