1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44 #include <limits>
45
46 #ifdef HAVE_EIGEN
47 # if defined(_MSC_VER)
48 # pragma warning(push)
49 # pragma warning(disable:4701) // potentially uninitialized local variable
50 # pragma warning(disable:4702) // unreachable code
51 # pragma warning(disable:4714) // const marked as __forceinline not inlined
52 # endif
53 # include <Eigen/Core>
54 # include <Eigen/Eigenvalues>
55 # if defined(_MSC_VER)
56 # pragma warning(pop)
57 # endif
58 # include "opencv2/core/eigen.hpp"
59 #endif
60
61 #if defined _M_IX86 && defined _MSC_VER && _MSC_VER < 1700
62 #pragma float_control(precise, on)
63 #endif
64
65 namespace cv
66 {
67
LU(float * A,size_t astep,int m,float * b,size_t bstep,int n)68 int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
69 {
70 CV_INSTRUMENT_REGION();
71
72 return hal::LU32f(A, astep, m, b, bstep, n);
73 }
74
LU(double * A,size_t astep,int m,double * b,size_t bstep,int n)75 int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
76 {
77 CV_INSTRUMENT_REGION();
78
79 return hal::LU64f(A, astep, m, b, bstep, n);
80 }
81
Cholesky(float * A,size_t astep,int m,float * b,size_t bstep,int n)82 bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
83 {
84 CV_INSTRUMENT_REGION();
85
86 return hal::Cholesky32f(A, astep, m, b, bstep, n);
87 }
88
Cholesky(double * A,size_t astep,int m,double * b,size_t bstep,int n)89 bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
90 {
91 CV_INSTRUMENT_REGION();
92
93 return hal::Cholesky64f(A, astep, m, b, bstep, n);
94 }
95
hypot(_Tp a,_Tp b)96 template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b)
97 {
98 a = std::abs(a);
99 b = std::abs(b);
100 if( a > b )
101 {
102 b /= a;
103 return a*std::sqrt(1 + b*b);
104 }
105 if( b > 0 )
106 {
107 a /= b;
108 return b*std::sqrt(1 + a*a);
109 }
110 return 0;
111 }
112
113
114 template<typename _Tp> bool
JacobiImpl_(_Tp * A,size_t astep,_Tp * W,_Tp * V,size_t vstep,int n,uchar * buf)115 JacobiImpl_( _Tp* A, size_t astep, _Tp* W, _Tp* V, size_t vstep, int n, uchar* buf )
116 {
117 const _Tp eps = std::numeric_limits<_Tp>::epsilon();
118 int i, j, k, m;
119
120 astep /= sizeof(A[0]);
121 if( V )
122 {
123 vstep /= sizeof(V[0]);
124 for( i = 0; i < n; i++ )
125 {
126 for( j = 0; j < n; j++ )
127 V[i*vstep + j] = (_Tp)0;
128 V[i*vstep + i] = (_Tp)1;
129 }
130 }
131
132 int iters, maxIters = n*n*30;
133
134 int* indR = (int*)alignPtr(buf, sizeof(int));
135 int* indC = indR + n;
136 _Tp mv = (_Tp)0;
137
138 for( k = 0; k < n; k++ )
139 {
140 W[k] = A[(astep + 1)*k];
141 if( k < n - 1 )
142 {
143 for( m = k+1, mv = std::abs(A[astep*k + m]), i = k+2; i < n; i++ )
144 {
145 _Tp val = std::abs(A[astep*k+i]);
146 if( mv < val )
147 mv = val, m = i;
148 }
149 indR[k] = m;
150 }
151 if( k > 0 )
152 {
153 for( m = 0, mv = std::abs(A[k]), i = 1; i < k; i++ )
154 {
155 _Tp val = std::abs(A[astep*i+k]);
156 if( mv < val )
157 mv = val, m = i;
158 }
159 indC[k] = m;
160 }
161 }
162
163 if( n > 1 ) for( iters = 0; iters < maxIters; iters++ )
164 {
165 // find index (k,l) of pivot p
166 for( k = 0, mv = std::abs(A[indR[0]]), i = 1; i < n-1; i++ )
167 {
168 _Tp val = std::abs(A[astep*i + indR[i]]);
169 if( mv < val )
170 mv = val, k = i;
171 }
172 int l = indR[k];
173 for( i = 1; i < n; i++ )
174 {
175 _Tp val = std::abs(A[astep*indC[i] + i]);
176 if( mv < val )
177 mv = val, k = indC[i], l = i;
178 }
179
180 _Tp p = A[astep*k + l];
181 if( std::abs(p) <= eps )
182 break;
183 _Tp y = (_Tp)((W[l] - W[k])*0.5);
184 _Tp t = std::abs(y) + hypot(p, y);
185 _Tp s = hypot(p, t);
186 _Tp c = t/s;
187 s = p/s; t = (p/t)*p;
188 if( y < 0 )
189 s = -s, t = -t;
190 A[astep*k + l] = 0;
191
192 W[k] -= t;
193 W[l] += t;
194
195 _Tp a0, b0;
196
197 #undef rotate
198 #define rotate(v0, v1) a0 = v0, b0 = v1, v0 = a0*c - b0*s, v1 = a0*s + b0*c
199
200 // rotate rows and columns k and l
201 for( i = 0; i < k; i++ )
202 rotate(A[astep*i+k], A[astep*i+l]);
203 for( i = k+1; i < l; i++ )
204 rotate(A[astep*k+i], A[astep*i+l]);
205 for( i = l+1; i < n; i++ )
206 rotate(A[astep*k+i], A[astep*l+i]);
207
208 // rotate eigenvectors
209 if( V )
210 for( i = 0; i < n; i++ )
211 rotate(V[vstep*k+i], V[vstep*l+i]);
212
213 #undef rotate
214
215 for( j = 0; j < 2; j++ )
216 {
217 int idx = j == 0 ? k : l;
218 if( idx < n - 1 )
219 {
220 for( m = idx+1, mv = std::abs(A[astep*idx + m]), i = idx+2; i < n; i++ )
221 {
222 _Tp val = std::abs(A[astep*idx+i]);
223 if( mv < val )
224 mv = val, m = i;
225 }
226 indR[idx] = m;
227 }
228 if( idx > 0 )
229 {
230 for( m = 0, mv = std::abs(A[idx]), i = 1; i < idx; i++ )
231 {
232 _Tp val = std::abs(A[astep*i+idx]);
233 if( mv < val )
234 mv = val, m = i;
235 }
236 indC[idx] = m;
237 }
238 }
239 }
240
241 // sort eigenvalues & eigenvectors
242 for( k = 0; k < n-1; k++ )
243 {
244 m = k;
245 for( i = k+1; i < n; i++ )
246 {
247 if( W[m] < W[i] )
248 m = i;
249 }
250 if( k != m )
251 {
252 std::swap(W[m], W[k]);
253 if( V )
254 for( i = 0; i < n; i++ )
255 std::swap(V[vstep*m + i], V[vstep*k + i]);
256 }
257 }
258
259 return true;
260 }
261
Jacobi(float * S,size_t sstep,float * e,float * E,size_t estep,int n,uchar * buf)262 static bool Jacobi( float* S, size_t sstep, float* e, float* E, size_t estep, int n, uchar* buf )
263 {
264 return JacobiImpl_(S, sstep, e, E, estep, n, buf);
265 }
266
Jacobi(double * S,size_t sstep,double * e,double * E,size_t estep,int n,uchar * buf)267 static bool Jacobi( double* S, size_t sstep, double* e, double* E, size_t estep, int n, uchar* buf )
268 {
269 return JacobiImpl_(S, sstep, e, E, estep, n, buf);
270 }
271
272
273 template<typename T> struct VBLAS
274 {
dotcv::VBLAS275 int dot(const T*, const T*, int, T*) const { return 0; }
givenscv::VBLAS276 int givens(T*, T*, int, T, T) const { return 0; }
givensxcv::VBLAS277 int givensx(T*, T*, int, T, T, T*, T*) const { return 0; }
278 };
279
280 #if CV_SIMD
dot(const float * a,const float * b,int n,float * result) const281 template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
282 {
283 if( n < 2*v_float32::nlanes )
284 return 0;
285 int k = 0;
286 v_float32 s0 = vx_setzero_f32();
287 for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes )
288 {
289 v_float32 a0 = vx_load(a + k);
290 v_float32 b0 = vx_load(b + k);
291
292 s0 += a0 * b0;
293 }
294 *result = v_reduce_sum(s0);
295 vx_cleanup();
296 return k;
297 }
298
299
givens(float * a,float * b,int n,float c,float s) const300 template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const
301 {
302 if( n < v_float32::nlanes)
303 return 0;
304 int k = 0;
305 v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s);
306 for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes )
307 {
308 v_float32 a0 = vx_load(a + k);
309 v_float32 b0 = vx_load(b + k);
310 v_float32 t0 = (a0 * c4) + (b0 * s4);
311 v_float32 t1 = (b0 * c4) - (a0 * s4);
312 v_store(a + k, t0);
313 v_store(b + k, t1);
314 }
315 vx_cleanup();
316 return k;
317 }
318
319
givensx(float * a,float * b,int n,float c,float s,float * anorm,float * bnorm) const320 template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c, float s,
321 float* anorm, float* bnorm) const
322 {
323 if( n < v_float32::nlanes)
324 return 0;
325 int k = 0;
326 v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s);
327 v_float32 sa = vx_setzero_f32(), sb = vx_setzero_f32();
328 for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes )
329 {
330 v_float32 a0 = vx_load(a + k);
331 v_float32 b0 = vx_load(b + k);
332 v_float32 t0 = (a0 * c4) + (b0 * s4);
333 v_float32 t1 = (b0 * c4) - (a0 * s4);
334 v_store(a + k, t0);
335 v_store(b + k, t1);
336 sa += t0 + t0;
337 sb += t1 + t1;
338 }
339 *anorm = v_reduce_sum(sa);
340 *bnorm = v_reduce_sum(sb);
341 vx_cleanup();
342 return k;
343 }
344
345 #if CV_SIMD_64F
dot(const double * a,const double * b,int n,double * result) const346 template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const
347 {
348 if( n < 2*v_float64::nlanes )
349 return 0;
350 int k = 0;
351 v_float64 s0 = vx_setzero_f64();
352 for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes )
353 {
354 v_float64 a0 = vx_load(a + k);
355 v_float64 b0 = vx_load(b + k);
356
357 s0 += a0 * b0;
358 }
359 double sbuf[2];
360 v_store(sbuf, s0);
361 *result = sbuf[0] + sbuf[1];
362 vx_cleanup();
363 return k;
364 }
365
366
givens(double * a,double * b,int n,double c,double s) const367 template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double c, double s) const
368 {
369 int k = 0;
370 v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s);
371 for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes )
372 {
373 v_float64 a0 = vx_load(a + k);
374 v_float64 b0 = vx_load(b + k);
375 v_float64 t0 = (a0 * c2) + (b0 * s2);
376 v_float64 t1 = (b0 * c2) - (a0 * s2);
377 v_store(a + k, t0);
378 v_store(b + k, t1);
379 }
380 vx_cleanup();
381 return k;
382 }
383
384
givensx(double * a,double * b,int n,double c,double s,double * anorm,double * bnorm) const385 template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double c, double s,
386 double* anorm, double* bnorm) const
387 {
388 int k = 0;
389 v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s);
390 v_float64 sa = vx_setzero_f64(), sb = vx_setzero_f64();
391 for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes )
392 {
393 v_float64 a0 = vx_load(a + k);
394 v_float64 b0 = vx_load(b + k);
395 v_float64 t0 = (a0 * c2) + (b0 * s2);
396 v_float64 t1 = (b0 * c2) - (a0 * s2);
397 v_store(a + k, t0);
398 v_store(b + k, t1);
399 sa += t0 * t0;
400 sb += t1 * t1;
401 }
402 double abuf[2], bbuf[2];
403 v_store(abuf, sa);
404 v_store(bbuf, sb);
405 *anorm = abuf[0] + abuf[1];
406 *bnorm = bbuf[0] + bbuf[1];
407 return k;
408 }
409 #endif //CV_SIMD_64F
410 #endif //CV_SIMD
411
412 template<typename _Tp> void
JacobiSVDImpl_(_Tp * At,size_t astep,_Tp * _W,_Tp * Vt,size_t vstep,int m,int n,int n1,double minval,_Tp eps)413 JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
414 int m, int n, int n1, double minval, _Tp eps)
415 {
416 VBLAS<_Tp> vblas;
417 AutoBuffer<double> Wbuf(n);
418 double* W = Wbuf.data();
419 int i, j, k, iter, max_iter = std::max(m, 30);
420 _Tp c, s;
421 double sd;
422 astep /= sizeof(At[0]);
423 vstep /= sizeof(Vt[0]);
424
425 for( i = 0; i < n; i++ )
426 {
427 for( k = 0, sd = 0; k < m; k++ )
428 {
429 _Tp t = At[i*astep + k];
430 sd += (double)t*t;
431 }
432 W[i] = sd;
433
434 if( Vt )
435 {
436 for( k = 0; k < n; k++ )
437 Vt[i*vstep + k] = 0;
438 Vt[i*vstep + i] = 1;
439 }
440 }
441
442 for( iter = 0; iter < max_iter; iter++ )
443 {
444 bool changed = false;
445
446 for( i = 0; i < n-1; i++ )
447 for( j = i+1; j < n; j++ )
448 {
449 _Tp *Ai = At + i*astep, *Aj = At + j*astep;
450 double a = W[i], p = 0, b = W[j];
451
452 for( k = 0; k < m; k++ )
453 p += (double)Ai[k]*Aj[k];
454
455 if( std::abs(p) <= eps*std::sqrt((double)a*b) )
456 continue;
457
458 p *= 2;
459 double beta = a - b, gamma = hypot((double)p, beta);
460 if( beta < 0 )
461 {
462 double delta = (gamma - beta)*0.5;
463 s = (_Tp)std::sqrt(delta/gamma);
464 c = (_Tp)(p/(gamma*s*2));
465 }
466 else
467 {
468 c = (_Tp)std::sqrt((gamma + beta)/(gamma*2));
469 s = (_Tp)(p/(gamma*c*2));
470 }
471
472 a = b = 0;
473 for( k = 0; k < m; k++ )
474 {
475 _Tp t0 = c*Ai[k] + s*Aj[k];
476 _Tp t1 = -s*Ai[k] + c*Aj[k];
477 Ai[k] = t0; Aj[k] = t1;
478
479 a += (double)t0*t0; b += (double)t1*t1;
480 }
481 W[i] = a; W[j] = b;
482
483 changed = true;
484
485 if( Vt )
486 {
487 _Tp *Vi = Vt + i*vstep, *Vj = Vt + j*vstep;
488 k = vblas.givens(Vi, Vj, n, c, s);
489
490 for( ; k < n; k++ )
491 {
492 _Tp t0 = c*Vi[k] + s*Vj[k];
493 _Tp t1 = -s*Vi[k] + c*Vj[k];
494 Vi[k] = t0; Vj[k] = t1;
495 }
496 }
497 }
498 if( !changed )
499 break;
500 }
501
502 for( i = 0; i < n; i++ )
503 {
504 for( k = 0, sd = 0; k < m; k++ )
505 {
506 _Tp t = At[i*astep + k];
507 sd += (double)t*t;
508 }
509 W[i] = std::sqrt(sd);
510 }
511
512 for( i = 0; i < n-1; i++ )
513 {
514 j = i;
515 for( k = i+1; k < n; k++ )
516 {
517 if( W[j] < W[k] )
518 j = k;
519 }
520 if( i != j )
521 {
522 std::swap(W[i], W[j]);
523 if( Vt )
524 {
525 for( k = 0; k < m; k++ )
526 std::swap(At[i*astep + k], At[j*astep + k]);
527
528 for( k = 0; k < n; k++ )
529 std::swap(Vt[i*vstep + k], Vt[j*vstep + k]);
530 }
531 }
532 }
533
534 for( i = 0; i < n; i++ )
535 _W[i] = (_Tp)W[i];
536
537 if( !Vt )
538 return;
539
540 RNG rng(0x12345678);
541 for( i = 0; i < n1; i++ )
542 {
543 sd = i < n ? W[i] : 0;
544
545 for( int ii = 0; ii < 100 && sd <= minval; ii++ )
546 {
547 // if we got a zero singular value, then in order to get the corresponding left singular vector
548 // we generate a random vector, project it to the previously computed left singular vectors,
549 // subtract the projection and normalize the difference.
550 const _Tp val0 = (_Tp)(1./m);
551 for( k = 0; k < m; k++ )
552 {
553 _Tp val = (rng.next() & 256) != 0 ? val0 : -val0;
554 At[i*astep + k] = val;
555 }
556 for( iter = 0; iter < 2; iter++ )
557 {
558 for( j = 0; j < i; j++ )
559 {
560 sd = 0;
561 for( k = 0; k < m; k++ )
562 sd += At[i*astep + k]*At[j*astep + k];
563 _Tp asum = 0;
564 for( k = 0; k < m; k++ )
565 {
566 _Tp t = (_Tp)(At[i*astep + k] - sd*At[j*astep + k]);
567 At[i*astep + k] = t;
568 asum += std::abs(t);
569 }
570 asum = asum > eps*100 ? 1/asum : 0;
571 for( k = 0; k < m; k++ )
572 At[i*astep + k] *= asum;
573 }
574 }
575 sd = 0;
576 for( k = 0; k < m; k++ )
577 {
578 _Tp t = At[i*astep + k];
579 sd += (double)t*t;
580 }
581 sd = std::sqrt(sd);
582 }
583
584 s = (_Tp)(sd > minval ? 1/sd : 0.);
585 for( k = 0; k < m; k++ )
586 At[i*astep + k] *= s;
587 }
588 }
589
590
JacobiSVD(float * At,size_t astep,float * W,float * Vt,size_t vstep,int m,int n,int n1=-1)591 static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
592 {
593 hal::SVD32f(At, astep, W, NULL, astep, Vt, vstep, m, n, n1);
594 }
595
JacobiSVD(double * At,size_t astep,double * W,double * Vt,size_t vstep,int m,int n,int n1=-1)596 static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
597 {
598 hal::SVD64f(At, astep, W, NULL, astep, Vt, vstep, m, n, n1);
599 }
600
601 template <typename fptype> static inline int
decodeSVDParameters(const fptype * U,const fptype * Vt,int m,int n,int n1)602 decodeSVDParameters(const fptype* U, const fptype* Vt, int m, int n, int n1)
603 {
604 int halSVDFlag = 0;
605 if(Vt == NULL)
606 halSVDFlag = CV_HAL_SVD_NO_UV;
607 else if(n1 <= 0 || n1 == n)
608 {
609 halSVDFlag = CV_HAL_SVD_SHORT_UV;
610 if(U == NULL)
611 halSVDFlag |= CV_HAL_SVD_MODIFY_A;
612 }
613 else if(n1 == m)
614 {
615 halSVDFlag = CV_HAL_SVD_FULL_UV;
616 if(U == NULL)
617 halSVDFlag |= CV_HAL_SVD_MODIFY_A;
618 }
619 return halSVDFlag;
620 }
621
SVD32f(float * At,size_t astep,float * W,float * U,size_t ustep,float * Vt,size_t vstep,int m,int n,int n1)622 void hal::SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int n1)
623 {
624 CALL_HAL(SVD32f, cv_hal_SVD32f, At, astep, W, U, ustep, Vt, vstep, m, n, decodeSVDParameters(U, Vt, m, n, n1))
625 JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
626 }
627
SVD64f(double * At,size_t astep,double * W,double * U,size_t ustep,double * Vt,size_t vstep,int m,int n,int n1)628 void hal::SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int n1)
629 {
630 CALL_HAL(SVD64f, cv_hal_SVD64f, At, astep, W, U, ustep, Vt, vstep, m, n, decodeSVDParameters(U, Vt, m, n, n1))
631 JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10);
632 }
633
634 /* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
635 template<typename T1, typename T2, typename T3> static void
MatrAXPY(int m,int n,const T1 * x,int dx,const T2 * a,int inca,T3 * y,int dy)636 MatrAXPY( int m, int n, const T1* x, int dx,
637 const T2* a, int inca, T3* y, int dy )
638 {
639 int i;
640 for( i = 0; i < m; i++, x += dx, y += dy )
641 {
642 T2 s = a[i*inca];
643 int j = 0;
644 #if CV_ENABLE_UNROLLED
645 for(; j <= n - 4; j += 4 )
646 {
647 T3 t0 = (T3)(y[j] + s*x[j]);
648 T3 t1 = (T3)(y[j+1] + s*x[j+1]);
649 y[j] = t0;
650 y[j+1] = t1;
651 t0 = (T3)(y[j+2] + s*x[j+2]);
652 t1 = (T3)(y[j+3] + s*x[j+3]);
653 y[j+2] = t0;
654 y[j+3] = t1;
655 }
656 #endif
657 for( ; j < n; j++ )
658 y[j] = (T3)(y[j] + s*x[j]);
659 }
660 }
661
662 template<typename T> static void
SVBkSbImpl_(int m,int n,const T * w,int incw,const T * u,int ldu,bool uT,const T * v,int ldv,bool vT,const T * b,int ldb,int nb,T * x,int ldx,double * buffer,T eps)663 SVBkSbImpl_( int m, int n, const T* w, int incw,
664 const T* u, int ldu, bool uT,
665 const T* v, int ldv, bool vT,
666 const T* b, int ldb, int nb,
667 T* x, int ldx, double* buffer, T eps )
668 {
669 double threshold = 0;
670 int udelta0 = uT ? ldu : 1, udelta1 = uT ? 1 : ldu;
671 int vdelta0 = vT ? ldv : 1, vdelta1 = vT ? 1 : ldv;
672 int i, j, nm = std::min(m, n);
673
674 if( !b )
675 nb = m;
676
677 for( i = 0; i < n; i++ )
678 for( j = 0; j < nb; j++ )
679 x[i*ldx + j] = 0;
680
681 for( i = 0; i < nm; i++ )
682 threshold += w[i*incw];
683 threshold *= eps;
684
685 // v * inv(w) * uT * b
686 for( i = 0; i < nm; i++, u += udelta0, v += vdelta0 )
687 {
688 double wi = w[i*incw];
689 if( (double)std::abs(wi) <= threshold )
690 continue;
691 wi = 1/wi;
692
693 if( nb == 1 )
694 {
695 double s = 0;
696 if( b )
697 for( j = 0; j < m; j++ )
698 s += u[j*udelta1]*b[j*ldb];
699 else
700 s = u[0];
701 s *= wi;
702
703 for( j = 0; j < n; j++ )
704 x[j*ldx] = (T)(x[j*ldx] + s*v[j*vdelta1]);
705 }
706 else
707 {
708 if( b )
709 {
710 for( j = 0; j < nb; j++ )
711 buffer[j] = 0;
712 MatrAXPY( m, nb, b, ldb, u, udelta1, buffer, 0 );
713 for( j = 0; j < nb; j++ )
714 buffer[j] *= wi;
715 }
716 else
717 {
718 for( j = 0; j < nb; j++ )
719 buffer[j] = u[j*udelta1]*wi;
720 }
721 MatrAXPY( n, nb, buffer, 0, v, vdelta1, x, ldx );
722 }
723 }
724 }
725
726 static void
SVBkSb(int m,int n,const float * w,size_t wstep,const float * u,size_t ustep,bool uT,const float * v,size_t vstep,bool vT,const float * b,size_t bstep,int nb,float * x,size_t xstep,uchar * buffer)727 SVBkSb( int m, int n, const float* w, size_t wstep,
728 const float* u, size_t ustep, bool uT,
729 const float* v, size_t vstep, bool vT,
730 const float* b, size_t bstep, int nb,
731 float* x, size_t xstep, uchar* buffer )
732 {
733 SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
734 u, (int)(ustep/sizeof(u[0])), uT,
735 v, (int)(vstep/sizeof(v[0])), vT,
736 b, (int)(bstep/sizeof(b[0])), nb,
737 x, (int)(xstep/sizeof(x[0])),
738 (double*)alignPtr(buffer, sizeof(double)), (float)(DBL_EPSILON*2) );
739 }
740
741 static void
SVBkSb(int m,int n,const double * w,size_t wstep,const double * u,size_t ustep,bool uT,const double * v,size_t vstep,bool vT,const double * b,size_t bstep,int nb,double * x,size_t xstep,uchar * buffer)742 SVBkSb( int m, int n, const double* w, size_t wstep,
743 const double* u, size_t ustep, bool uT,
744 const double* v, size_t vstep, bool vT,
745 const double* b, size_t bstep, int nb,
746 double* x, size_t xstep, uchar* buffer )
747 {
748 SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
749 u, (int)(ustep/sizeof(u[0])), uT,
750 v, (int)(vstep/sizeof(v[0])), vT,
751 b, (int)(bstep/sizeof(b[0])), nb,
752 x, (int)(xstep/sizeof(x[0])),
753 (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 );
754 }
755
756 /****************************************************************************************\
757 * Determinant of the matrix *
758 \****************************************************************************************/
759
760 #define det2(m) ((double)m(0,0)*m(1,1) - (double)m(0,1)*m(1,0))
761 #define det3(m) (m(0,0)*((double)m(1,1)*m(2,2) - (double)m(1,2)*m(2,1)) - \
762 m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \
763 m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0)))
764
determinant(InputArray _mat)765 double determinant( InputArray _mat )
766 {
767 CV_INSTRUMENT_REGION();
768
769 Mat mat = _mat.getMat();
770 double result = 0;
771 int type = mat.type(), rows = mat.rows;
772 size_t step = mat.step;
773 const uchar* m = mat.ptr();
774
775 CV_Assert( !mat.empty() );
776 CV_Assert( mat.rows == mat.cols && (type == CV_32F || type == CV_64F));
777
778 #define Mf(y, x) ((float*)(m + y*step))[x]
779 #define Md(y, x) ((double*)(m + y*step))[x]
780
781 if( type == CV_32F )
782 {
783 if( rows == 2 )
784 result = det2(Mf);
785 else if( rows == 3 )
786 result = det3(Mf);
787 else if( rows == 1 )
788 result = Mf(0,0);
789 else
790 {
791 size_t bufSize = rows*rows*sizeof(float);
792 AutoBuffer<uchar> buffer(bufSize);
793 Mat a(rows, rows, CV_32F, buffer.data());
794 mat.copyTo(a);
795
796 result = hal::LU32f(a.ptr<float>(), a.step, rows, 0, 0, 0);
797 if( result )
798 {
799 for( int i = 0; i < rows; i++ )
800 result *= a.at<float>(i,i);
801 }
802 }
803 }
804 else
805 {
806 if( rows == 2 )
807 result = det2(Md);
808 else if( rows == 3 )
809 result = det3(Md);
810 else if( rows == 1 )
811 result = Md(0,0);
812 else
813 {
814 size_t bufSize = rows*rows*sizeof(double);
815 AutoBuffer<uchar> buffer(bufSize);
816 Mat a(rows, rows, CV_64F, buffer.data());
817 mat.copyTo(a);
818
819 result = hal::LU64f(a.ptr<double>(), a.step, rows, 0, 0, 0);
820 if( result )
821 {
822 for( int i = 0; i < rows; i++ )
823 result *= a.at<double>(i,i);
824 }
825 }
826 }
827
828 #undef Mf
829 #undef Md
830
831 return result;
832 }
833
834 /****************************************************************************************\
835 * Inverse (or pseudo-inverse) of a matrix *
836 \****************************************************************************************/
837
838 #define Sf( y, x ) ((float*)(srcdata + y*srcstep))[x]
839 #define Sd( y, x ) ((double*)(srcdata + y*srcstep))[x]
840 #define Df( y, x ) ((float*)(dstdata + y*dststep))[x]
841 #define Dd( y, x ) ((double*)(dstdata + y*dststep))[x]
842
invert(InputArray _src,OutputArray _dst,int method)843 double invert( InputArray _src, OutputArray _dst, int method )
844 {
845 CV_INSTRUMENT_REGION();
846
847 bool result = false;
848 Mat src = _src.getMat();
849 int type = src.type();
850
851 CV_Assert(type == CV_32F || type == CV_64F);
852
853 size_t esz = CV_ELEM_SIZE(type);
854 int m = src.rows, n = src.cols;
855
856 if( method == DECOMP_SVD )
857 {
858 int nm = std::min(m, n);
859
860 AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double));
861 uchar* buf = alignPtr((uchar*)_buf.data(), (int)esz);
862 Mat u(m, nm, type, buf);
863 Mat w(nm, 1, type, u.ptr() + m*nm*esz);
864 Mat vt(nm, n, type, w.ptr() + nm*esz);
865
866 SVD::compute(src, w, u, vt);
867 SVD::backSubst(w, u, vt, Mat(), _dst);
868 return type == CV_32F ?
869 (w.ptr<float>()[0] >= FLT_EPSILON ?
870 w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
871 (w.ptr<double>()[0] >= DBL_EPSILON ?
872 w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
873 }
874
875 CV_Assert( m == n );
876
877 if( method == DECOMP_EIG )
878 {
879 AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double));
880 uchar* buf = alignPtr((uchar*)_buf.data(), (int)esz);
881 Mat u(n, n, type, buf);
882 Mat w(n, 1, type, u.ptr() + n*n*esz);
883 Mat vt(n, n, type, w.ptr() + n*esz);
884
885 eigen(src, w, vt);
886 transpose(vt, u);
887 SVD::backSubst(w, u, vt, Mat(), _dst);
888 return type == CV_32F ?
889 (w.ptr<float>()[0] >= FLT_EPSILON ?
890 w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
891 (w.ptr<double>()[0] >= DBL_EPSILON ?
892 w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
893 }
894
895 CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
896
897 _dst.create( n, n, type );
898 Mat dst = _dst.getMat();
899
900 if( n <= 3 )
901 {
902 const uchar* srcdata = src.ptr();
903 uchar* dstdata = dst.ptr();
904 size_t srcstep = src.step;
905 size_t dststep = dst.step;
906
907 if( n == 2 )
908 {
909 if( type == CV_32FC1 )
910 {
911 double d = det2(Sf);
912 if( d != 0. )
913 {
914 result = true;
915 d = 1./d;
916 #if CV_SIMD128
917 const float d_32f = (float)d;
918 const v_float32x4 d_vec(d_32f, -d_32f, -d_32f, d_32f);
919 v_float32x4 s0 = v_load_halves((const float*)srcdata, (const float*)(srcdata + srcstep)) * d_vec;//0123//3120
920 s0 = v_extract<3>(s0, v_combine_low(v_rotate_right<1>(s0), s0));
921 v_store_low((float*)dstdata, s0);
922 v_store_high((float*)(dstdata + dststep), s0);
923 #else
924 double t0, t1;
925 t0 = Sf(0,0)*d;
926 t1 = Sf(1,1)*d;
927 Df(1,1) = (float)t0;
928 Df(0,0) = (float)t1;
929 t0 = -Sf(0,1)*d;
930 t1 = -Sf(1,0)*d;
931 Df(0,1) = (float)t0;
932 Df(1,0) = (float)t1;
933 #endif
934 }
935 }
936 else
937 {
938 double d = det2(Sd);
939 if( d != 0. )
940 {
941 result = true;
942 d = 1./d;
943 #if CV_SIMD128_64F
944 v_float64x2 det = v_setall_f64(d);
945 v_float64x2 s0 = v_load((const double*)srcdata) * det;
946 v_float64x2 s1 = v_load((const double*)(srcdata+srcstep)) * det;
947 v_float64x2 sm = v_extract<1>(s1, s0);//30
948 v_float64x2 ss = v_setall<double>(0) - v_extract<1>(s0, s1);//12
949 v_store((double*)dstdata, v_combine_low(sm, ss));//31
950 v_store((double*)(dstdata + dststep), v_combine_high(ss, sm));//20
951 #else
952 double t0, t1;
953 t0 = Sd(0,0)*d;
954 t1 = Sd(1,1)*d;
955 Dd(1,1) = t0;
956 Dd(0,0) = t1;
957 t0 = -Sd(0,1)*d;
958 t1 = -Sd(1,0)*d;
959 Dd(0,1) = t0;
960 Dd(1,0) = t1;
961 #endif
962 }
963 }
964 }
965 else if( n == 3 )
966 {
967 if( type == CV_32FC1 )
968 {
969 double d = det3(Sf);
970
971 if( d != 0. )
972 {
973 double t[12];
974
975 result = true;
976 d = 1./d;
977 t[0] = (((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d);
978 t[1] = (((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d);
979 t[2] = (((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d);
980
981 t[3] = (((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d);
982 t[4] = (((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d);
983 t[5] = (((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d);
984
985 t[6] = (((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d);
986 t[7] = (((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d);
987 t[8] = (((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d);
988
989 Df(0,0) = (float)t[0]; Df(0,1) = (float)t[1]; Df(0,2) = (float)t[2];
990 Df(1,0) = (float)t[3]; Df(1,1) = (float)t[4]; Df(1,2) = (float)t[5];
991 Df(2,0) = (float)t[6]; Df(2,1) = (float)t[7]; Df(2,2) = (float)t[8];
992 }
993 }
994 else
995 {
996 double d = det3(Sd);
997 if( d != 0. )
998 {
999 result = true;
1000 d = 1./d;
1001 double t[9];
1002
1003 t[0] = (Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1)) * d;
1004 t[1] = (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2)) * d;
1005 t[2] = (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1)) * d;
1006
1007 t[3] = (Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2)) * d;
1008 t[4] = (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0)) * d;
1009 t[5] = (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2)) * d;
1010
1011 t[6] = (Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0)) * d;
1012 t[7] = (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1)) * d;
1013 t[8] = (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0)) * d;
1014
1015 Dd(0,0) = t[0]; Dd(0,1) = t[1]; Dd(0,2) = t[2];
1016 Dd(1,0) = t[3]; Dd(1,1) = t[4]; Dd(1,2) = t[5];
1017 Dd(2,0) = t[6]; Dd(2,1) = t[7]; Dd(2,2) = t[8];
1018 }
1019 }
1020 }
1021 else
1022 {
1023 assert( n == 1 );
1024
1025 if( type == CV_32FC1 )
1026 {
1027 double d = Sf(0,0);
1028 if( d != 0. )
1029 {
1030 result = true;
1031 Df(0,0) = (float)(1./d);
1032 }
1033 }
1034 else
1035 {
1036 double d = Sd(0,0);
1037 if( d != 0. )
1038 {
1039 result = true;
1040 Dd(0,0) = 1./d;
1041 }
1042 }
1043 }
1044 if( !result )
1045 dst = Scalar(0);
1046 return result;
1047 }
1048
1049 int elem_size = CV_ELEM_SIZE(type);
1050 AutoBuffer<uchar> buf(n*n*elem_size);
1051 Mat src1(n, n, type, buf.data());
1052 src.copyTo(src1);
1053 setIdentity(dst);
1054
1055 if( method == DECOMP_LU && type == CV_32F )
1056 result = hal::LU32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0;
1057 else if( method == DECOMP_LU && type == CV_64F )
1058 result = hal::LU64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0;
1059 else if( method == DECOMP_CHOLESKY && type == CV_32F )
1060 result = hal::Cholesky32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n);
1061 else
1062 result = hal::Cholesky64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n);
1063
1064 if( !result )
1065 dst = Scalar(0);
1066
1067 return result;
1068 }
1069
inv(int method) const1070 UMat UMat::inv(int method) const
1071 {
1072 UMat m;
1073 invert(*this, m, method);
1074 return m;
1075 }
1076
1077
1078 /****************************************************************************************\
1079 * Solving a linear system *
1080 \****************************************************************************************/
1081
solve(InputArray _src,InputArray _src2arg,OutputArray _dst,int method)1082 bool solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
1083 {
1084 CV_INSTRUMENT_REGION();
1085
1086 bool result = true;
1087 Mat src = _src.getMat(), _src2 = _src2arg.getMat();
1088 int type = src.type();
1089 bool is_normal = (method & DECOMP_NORMAL) != 0;
1090
1091 CV_Assert( type == _src2.type() && (type == CV_32F || type == CV_64F) );
1092
1093 method &= ~DECOMP_NORMAL;
1094 CV_Check(method, method == DECOMP_LU || method == DECOMP_SVD || method == DECOMP_EIG ||
1095 method == DECOMP_CHOLESKY || method == DECOMP_QR,
1096 "Unsupported method, see #DecompTypes");
1097 CV_Assert( (method != DECOMP_LU && method != DECOMP_CHOLESKY) ||
1098 is_normal || src.rows == src.cols );
1099
1100 // check case of a single equation and small matrix
1101 if( (method == DECOMP_LU || method == DECOMP_CHOLESKY) && !is_normal &&
1102 src.rows <= 3 && src.rows == src.cols && _src2.cols == 1 )
1103 {
1104 _dst.create( src.cols, _src2.cols, src.type() );
1105 Mat dst = _dst.getMat();
1106
1107 #define bf(y) ((float*)(bdata + y*src2step))[0]
1108 #define bd(y) ((double*)(bdata + y*src2step))[0]
1109
1110 const uchar* srcdata = src.ptr();
1111 const uchar* bdata = _src2.ptr();
1112 uchar* dstdata = dst.ptr();
1113 size_t srcstep = src.step;
1114 size_t src2step = _src2.step;
1115 size_t dststep = dst.step;
1116
1117 if( src.rows == 2 )
1118 {
1119 if( type == CV_32FC1 )
1120 {
1121 double d = det2(Sf);
1122 if( d != 0. )
1123 {
1124 double t;
1125 d = 1./d;
1126 t = (float)(((double)bf(0)*Sf(1,1) - (double)bf(1)*Sf(0,1))*d);
1127 Df(1,0) = (float)(((double)bf(1)*Sf(0,0) - (double)bf(0)*Sf(1,0))*d);
1128 Df(0,0) = (float)t;
1129 }
1130 else
1131 result = false;
1132 }
1133 else
1134 {
1135 double d = det2(Sd);
1136 if( d != 0. )
1137 {
1138 double t;
1139 d = 1./d;
1140 t = (bd(0)*Sd(1,1) - bd(1)*Sd(0,1))*d;
1141 Dd(1,0) = (bd(1)*Sd(0,0) - bd(0)*Sd(1,0))*d;
1142 Dd(0,0) = t;
1143 }
1144 else
1145 result = false;
1146 }
1147 }
1148 else if( src.rows == 3 )
1149 {
1150 if( type == CV_32FC1 )
1151 {
1152 double d = det3(Sf);
1153 if( d != 0. )
1154 {
1155 float t[3];
1156 d = 1./d;
1157
1158 t[0] = (float)(d*
1159 (bf(0)*((double)Sf(1,1)*Sf(2,2) - (double)Sf(1,2)*Sf(2,1)) -
1160 Sf(0,1)*((double)bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) +
1161 Sf(0,2)*((double)bf(1)*Sf(2,1) - (double)Sf(1,1)*bf(2))));
1162
1163 t[1] = (float)(d*
1164 (Sf(0,0)*(double)(bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) -
1165 bf(0)*((double)Sf(1,0)*Sf(2,2) - (double)Sf(1,2)*Sf(2,0)) +
1166 Sf(0,2)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0))));
1167
1168 t[2] = (float)(d*
1169 (Sf(0,0)*((double)Sf(1,1)*bf(2) - (double)bf(1)*Sf(2,1)) -
1170 Sf(0,1)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)) +
1171 bf(0)*((double)Sf(1,0)*Sf(2,1) - (double)Sf(1,1)*Sf(2,0))));
1172
1173 Df(0,0) = t[0];
1174 Df(1,0) = t[1];
1175 Df(2,0) = t[2];
1176 }
1177 else
1178 result = false;
1179 }
1180 else
1181 {
1182 double d = det3(Sd);
1183 if( d != 0. )
1184 {
1185 double t[9];
1186
1187 d = 1./d;
1188
1189 t[0] = ((Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1))*bd(0) +
1190 (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2))*bd(1) +
1191 (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1))*bd(2))*d;
1192
1193 t[1] = ((Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2))*bd(0) +
1194 (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0))*bd(1) +
1195 (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2))*bd(2))*d;
1196
1197 t[2] = ((Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0))*bd(0) +
1198 (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1))*bd(1) +
1199 (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0))*bd(2))*d;
1200
1201 Dd(0,0) = t[0];
1202 Dd(1,0) = t[1];
1203 Dd(2,0) = t[2];
1204 }
1205 else
1206 result = false;
1207 }
1208 }
1209 else
1210 {
1211 assert( src.rows == 1 );
1212
1213 if( type == CV_32FC1 )
1214 {
1215 double d = Sf(0,0);
1216 if( d != 0. )
1217 Df(0,0) = (float)(bf(0)/d);
1218 else
1219 result = false;
1220 }
1221 else
1222 {
1223 double d = Sd(0,0);
1224 if( d != 0. )
1225 Dd(0,0) = (bd(0)/d);
1226 else
1227 result = false;
1228 }
1229 }
1230 return result;
1231 }
1232
1233 int m = src.rows, m_ = m, n = src.cols, nb = _src2.cols;
1234 size_t esz = CV_ELEM_SIZE(type), bufsize = 0;
1235 size_t vstep = alignSize(n*esz, 16);
1236 size_t astep = method == DECOMP_SVD && !is_normal ? alignSize(m*esz, 16) : vstep;
1237 AutoBuffer<uchar> buffer;
1238
1239 Mat src2 = _src2;
1240 _dst.create( src.cols, src2.cols, src.type() );
1241 Mat dst = _dst.getMat();
1242
1243 if( m < n )
1244 CV_Error(CV_StsBadArg, "The function can not solve under-determined linear systems" );
1245
1246 if( m == n )
1247 is_normal = false;
1248 else if( is_normal )
1249 {
1250 m_ = n;
1251 if( method == DECOMP_SVD )
1252 method = DECOMP_EIG;
1253 }
1254
1255 size_t asize = astep*(method == DECOMP_SVD || is_normal ? n : m);
1256 bufsize += asize + 32;
1257
1258 if( is_normal )
1259 bufsize += n*nb*esz;
1260 if( method == DECOMP_SVD || method == DECOMP_EIG )
1261 bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32;
1262
1263 buffer.allocate(bufsize);
1264 uchar* ptr = alignPtr(buffer.data(), 16);
1265
1266 Mat a(m_, n, type, ptr, astep);
1267
1268 if( is_normal )
1269 mulTransposed(src, a, true);
1270 else if( method != DECOMP_SVD )
1271 src.copyTo(a);
1272 else
1273 {
1274 a = Mat(n, m_, type, ptr, astep);
1275 transpose(src, a);
1276 }
1277 ptr += asize;
1278
1279 if( !is_normal )
1280 {
1281 if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
1282 src2.copyTo(dst);
1283 }
1284 else
1285 {
1286 // a'*b
1287 if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
1288 gemm( src, src2, 1, Mat(), 0, dst, GEMM_1_T );
1289 else
1290 {
1291 Mat tmp(n, nb, type, ptr);
1292 ptr += n*nb*esz;
1293 gemm( src, src2, 1, Mat(), 0, tmp, GEMM_1_T );
1294 src2 = tmp;
1295 }
1296 }
1297
1298 if( method == DECOMP_LU )
1299 {
1300 if( type == CV_32F )
1301 result = hal::LU32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
1302 else
1303 result = hal::LU64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
1304 }
1305 else if( method == DECOMP_CHOLESKY )
1306 {
1307 if( type == CV_32F )
1308 result = hal::Cholesky32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
1309 else
1310 result = hal::Cholesky64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
1311 }
1312 else if( method == DECOMP_QR )
1313 {
1314 Mat rhsMat;
1315 if( is_normal || m == n )
1316 {
1317 src2.copyTo(dst);
1318 rhsMat = dst;
1319 }
1320 else
1321 {
1322 rhsMat = Mat(m, nb, type);
1323 src2.copyTo(rhsMat);
1324 }
1325
1326 if( type == CV_32F )
1327 result = hal::QR32f(a.ptr<float>(), a.step, a.rows, a.cols, rhsMat.cols, rhsMat.ptr<float>(), rhsMat.step, NULL) != 0;
1328 else
1329 result = hal::QR64f(a.ptr<double>(), a.step, a.rows, a.cols, rhsMat.cols, rhsMat.ptr<double>(), rhsMat.step, NULL) != 0;
1330
1331 if (rhsMat.rows != dst.rows)
1332 rhsMat.rowRange(0, dst.rows).copyTo(dst);
1333 }
1334 else
1335 {
1336 ptr = alignPtr(ptr, 16);
1337 Mat v(n, n, type, ptr, vstep), w(n, 1, type, ptr + vstep*n), u;
1338 ptr += n*(vstep + esz);
1339
1340 if( method == DECOMP_EIG )
1341 {
1342 if( type == CV_32F )
1343 Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr);
1344 else
1345 Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
1346 u = v;
1347 }
1348 else
1349 {
1350 if( type == CV_32F )
1351 JacobiSVD(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, m_, n);
1352 else
1353 JacobiSVD(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, m_, n);
1354 u = a;
1355 }
1356
1357 if( type == CV_32F )
1358 {
1359 SVBkSb(m_, n, w.ptr<float>(), 0, u.ptr<float>(), u.step, true,
1360 v.ptr<float>(), v.step, true, src2.ptr<float>(),
1361 src2.step, nb, dst.ptr<float>(), dst.step, ptr);
1362 }
1363 else
1364 {
1365 SVBkSb(m_, n, w.ptr<double>(), 0, u.ptr<double>(), u.step, true,
1366 v.ptr<double>(), v.step, true, src2.ptr<double>(),
1367 src2.step, nb, dst.ptr<double>(), dst.step, ptr);
1368 }
1369 result = true;
1370 }
1371
1372 if( !result )
1373 dst = Scalar(0);
1374
1375 return result;
1376 }
1377
1378
1379 /////////////////// finding eigenvalues and eigenvectors of a symmetric matrix ///////////////
1380
eigen(InputArray _src,OutputArray _evals,OutputArray _evects)1381 bool eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
1382 {
1383 CV_INSTRUMENT_REGION();
1384
1385 Mat src = _src.getMat();
1386 int type = src.type();
1387 int n = src.rows;
1388
1389 CV_Assert( src.rows == src.cols );
1390 CV_Assert (type == CV_32F || type == CV_64F);
1391
1392 Mat v;
1393 if( _evects.needed() )
1394 {
1395 _evects.create(n, n, type);
1396 v = _evects.getMat();
1397 }
1398
1399 #ifdef HAVE_EIGEN
1400 const bool evecNeeded = _evects.needed();
1401 const int esOptions = evecNeeded ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly;
1402 _evals.create(n, 1, type);
1403 Mat evals = _evals.getMat();
1404 if ( type == CV_64F )
1405 {
1406 Eigen::MatrixXd src_eig, zeros_eig;
1407 cv::cv2eigen(src, src_eig);
1408
1409 Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> es;
1410 es.compute(src_eig, esOptions);
1411 if ( es.info() == Eigen::Success )
1412 {
1413 cv::eigen2cv(es.eigenvalues().reverse().eval(), evals);
1414 if ( evecNeeded )
1415 {
1416 cv::Mat evects = _evects.getMat();
1417 cv::eigen2cv(es.eigenvectors().rowwise().reverse().transpose().eval(), v);
1418 }
1419 return true;
1420 }
1421 } else { // CV_32F
1422 Eigen::MatrixXf src_eig, zeros_eig;
1423 cv::cv2eigen(src, src_eig);
1424
1425 Eigen::SelfAdjointEigenSolver<Eigen::MatrixXf> es;
1426 es.compute(src_eig, esOptions);
1427 if ( es.info() == Eigen::Success )
1428 {
1429 cv::eigen2cv(es.eigenvalues().reverse().eval(), evals);
1430 if ( evecNeeded )
1431 {
1432 cv::eigen2cv(es.eigenvectors().rowwise().reverse().transpose().eval(), v);
1433 }
1434 return true;
1435 }
1436 }
1437 return false;
1438 #else
1439
1440 size_t elemSize = src.elemSize(), astep = alignSize(n*elemSize, 16);
1441 AutoBuffer<uchar> buf(n*astep + n*5*elemSize + 32);
1442 uchar* ptr = alignPtr(buf.data(), 16);
1443 Mat a(n, n, type, ptr, astep), w(n, 1, type, ptr + astep*n);
1444 ptr += astep*n + elemSize*n;
1445 src.copyTo(a);
1446 bool ok = type == CV_32F ?
1447 Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr) :
1448 Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
1449
1450 w.copyTo(_evals);
1451 return ok;
1452 #endif
1453 }
1454
_SVDcompute(InputArray _aarr,OutputArray _w,OutputArray _u,OutputArray _vt,int flags)1455 static void _SVDcompute( InputArray _aarr, OutputArray _w,
1456 OutputArray _u, OutputArray _vt, int flags )
1457 {
1458 Mat src = _aarr.getMat();
1459 int m = src.rows, n = src.cols;
1460 int type = src.type();
1461 bool compute_uv = _u.needed() || _vt.needed();
1462 bool full_uv = (flags & SVD::FULL_UV) != 0;
1463
1464 CV_Assert( type == CV_32F || type == CV_64F );
1465
1466 if( flags & SVD::NO_UV )
1467 {
1468 _u.release();
1469 _vt.release();
1470 compute_uv = full_uv = false;
1471 }
1472
1473 bool at = false;
1474 if( m < n )
1475 {
1476 std::swap(m, n);
1477 at = true;
1478 }
1479
1480 int urows = full_uv ? m : n;
1481 size_t esz = src.elemSize(), astep = alignSize(m*esz, 16), vstep = alignSize(n*esz, 16);
1482 AutoBuffer<uchar> _buf(urows*astep + n*vstep + n*esz + 32);
1483 uchar* buf = alignPtr(_buf.data(), 16);
1484 Mat temp_a(n, m, type, buf, astep);
1485 Mat temp_w(n, 1, type, buf + urows*astep);
1486 Mat temp_u(urows, m, type, buf, astep), temp_v;
1487
1488 if( compute_uv )
1489 temp_v = Mat(n, n, type, alignPtr(buf + urows*astep + n*esz, 16), vstep);
1490
1491 if( urows > n )
1492 temp_u = Scalar::all(0);
1493
1494 if( !at )
1495 transpose(src, temp_a);
1496 else
1497 src.copyTo(temp_a);
1498
1499 if( type == CV_32F )
1500 {
1501 JacobiSVD(temp_a.ptr<float>(), temp_u.step, temp_w.ptr<float>(),
1502 temp_v.ptr<float>(), temp_v.step, m, n, compute_uv ? urows : 0);
1503 }
1504 else
1505 {
1506 JacobiSVD(temp_a.ptr<double>(), temp_u.step, temp_w.ptr<double>(),
1507 temp_v.ptr<double>(), temp_v.step, m, n, compute_uv ? urows : 0);
1508 }
1509 temp_w.copyTo(_w);
1510 if( compute_uv )
1511 {
1512 if( !at )
1513 {
1514 if( _u.needed() )
1515 transpose(temp_u, _u);
1516 if( _vt.needed() )
1517 temp_v.copyTo(_vt);
1518 }
1519 else
1520 {
1521 if( _u.needed() )
1522 transpose(temp_v, _u);
1523 if( _vt.needed() )
1524 temp_u.copyTo(_vt);
1525 }
1526 }
1527 }
1528
1529
compute(InputArray a,OutputArray w,OutputArray u,OutputArray vt,int flags)1530 void SVD::compute( InputArray a, OutputArray w, OutputArray u, OutputArray vt, int flags )
1531 {
1532 CV_INSTRUMENT_REGION();
1533
1534 _SVDcompute(a, w, u, vt, flags);
1535 }
1536
compute(InputArray a,OutputArray w,int flags)1537 void SVD::compute( InputArray a, OutputArray w, int flags )
1538 {
1539 CV_INSTRUMENT_REGION();
1540
1541 _SVDcompute(a, w, noArray(), noArray(), flags);
1542 }
1543
backSubst(InputArray _w,InputArray _u,InputArray _vt,InputArray _rhs,OutputArray _dst)1544 void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt,
1545 InputArray _rhs, OutputArray _dst )
1546 {
1547 Mat w = _w.getMat(), u = _u.getMat(), vt = _vt.getMat(), rhs = _rhs.getMat();
1548 int type = w.type(), esz = (int)w.elemSize();
1549 int m = u.rows, n = vt.cols, nb = rhs.data ? rhs.cols : m, nm = std::min(m, n);
1550 size_t wstep = w.rows == 1 ? (size_t)esz : w.cols == 1 ? (size_t)w.step : (size_t)w.step + esz;
1551 AutoBuffer<uchar> buffer(nb*sizeof(double) + 16);
1552 CV_Assert( w.type() == u.type() && u.type() == vt.type() && u.data && vt.data && w.data );
1553 CV_Assert( u.cols >= nm && vt.rows >= nm &&
1554 (w.size() == Size(nm, 1) || w.size() == Size(1, nm) || w.size() == Size(vt.rows, u.cols)) );
1555 CV_Assert( rhs.data == 0 || (rhs.type() == type && rhs.rows == m) );
1556
1557 _dst.create( n, nb, type );
1558 Mat dst = _dst.getMat();
1559 if( type == CV_32F )
1560 SVBkSb(m, n, w.ptr<float>(), wstep, u.ptr<float>(), u.step, false,
1561 vt.ptr<float>(), vt.step, true, rhs.ptr<float>(), rhs.step, nb,
1562 dst.ptr<float>(), dst.step, buffer.data());
1563 else if( type == CV_64F )
1564 SVBkSb(m, n, w.ptr<double>(), wstep, u.ptr<double>(), u.step, false,
1565 vt.ptr<double>(), vt.step, true, rhs.ptr<double>(), rhs.step, nb,
1566 dst.ptr<double>(), dst.step, buffer.data());
1567 else
1568 CV_Error( CV_StsUnsupportedFormat, "" );
1569 }
1570
1571
operator ()(InputArray a,int flags)1572 SVD& SVD::operator ()(InputArray a, int flags)
1573 {
1574 _SVDcompute(a, w, u, vt, flags);
1575 return *this;
1576 }
1577
1578
backSubst(InputArray rhs,OutputArray dst) const1579 void SVD::backSubst( InputArray rhs, OutputArray dst ) const
1580 {
1581 backSubst( w, u, vt, rhs, dst );
1582 }
1583
1584 }
1585
1586
SVDecomp(InputArray src,OutputArray w,OutputArray u,OutputArray vt,int flags)1587 void cv::SVDecomp(InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags)
1588 {
1589 CV_INSTRUMENT_REGION();
1590
1591 SVD::compute(src, w, u, vt, flags);
1592 }
1593
SVBackSubst(InputArray w,InputArray u,InputArray vt,InputArray rhs,OutputArray dst)1594 void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, OutputArray dst)
1595 {
1596 CV_INSTRUMENT_REGION();
1597
1598 SVD::backSubst(w, u, vt, rhs, dst);
1599 }
1600
1601
1602
1603 #ifndef OPENCV_EXCLUDE_C_API
1604
1605 CV_IMPL double
cvDet(const CvArr * arr)1606 cvDet( const CvArr* arr )
1607 {
1608 if( CV_IS_MAT(arr) && ((CvMat*)arr)->rows <= 3 )
1609 {
1610 CvMat* mat = (CvMat*)arr;
1611 int type = CV_MAT_TYPE(mat->type);
1612 int rows = mat->rows;
1613 uchar* m = mat->data.ptr;
1614 int step = mat->step;
1615 CV_Assert( rows == mat->cols );
1616
1617 #define Mf(y, x) ((float*)(m + y*step))[x]
1618 #define Md(y, x) ((double*)(m + y*step))[x]
1619
1620 if( type == CV_32F )
1621 {
1622 if( rows == 2 )
1623 return det2(Mf);
1624 if( rows == 3 )
1625 return det3(Mf);
1626 }
1627 else if( type == CV_64F )
1628 {
1629 if( rows == 2 )
1630 return det2(Md);
1631 if( rows == 3 )
1632 return det3(Md);
1633 }
1634 }
1635 return cv::determinant(cv::cvarrToMat(arr));
1636 }
1637
1638
1639 CV_IMPL double
cvInvert(const CvArr * srcarr,CvArr * dstarr,int method)1640 cvInvert( const CvArr* srcarr, CvArr* dstarr, int method )
1641 {
1642 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1643
1644 CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows );
1645 return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
1646 method == CV_SVD ? cv::DECOMP_SVD :
1647 method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU );
1648 }
1649
1650
1651 CV_IMPL int
cvSolve(const CvArr * Aarr,const CvArr * barr,CvArr * xarr,int method)1652 cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method )
1653 {
1654 cv::Mat A = cv::cvarrToMat(Aarr), b = cv::cvarrToMat(barr), x = cv::cvarrToMat(xarr);
1655
1656 CV_Assert( A.type() == x.type() && A.cols == x.rows && x.cols == b.cols );
1657 bool is_normal = (method & CV_NORMAL) != 0;
1658 method &= ~CV_NORMAL;
1659 return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
1660 method == CV_SVD ? cv::DECOMP_SVD :
1661 method == CV_SVD_SYM ? cv::DECOMP_EIG :
1662 A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) );
1663 }
1664
1665
1666 CV_IMPL void
cvEigenVV(CvArr * srcarr,CvArr * evectsarr,CvArr * evalsarr,double,int,int)1667 cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
1668 int, int )
1669 {
1670 cv::Mat src = cv::cvarrToMat(srcarr), evals0 = cv::cvarrToMat(evalsarr), evals = evals0;
1671 if( evectsarr )
1672 {
1673 cv::Mat evects0 = cv::cvarrToMat(evectsarr), evects = evects0;
1674 eigen(src, evals, evects);
1675 if( evects0.data != evects.data )
1676 {
1677 const uchar* p = evects0.ptr();
1678 evects.convertTo(evects0, evects0.type());
1679 CV_Assert( p == evects0.ptr() );
1680 }
1681 }
1682 else
1683 eigen(src, evals);
1684 if( evals0.data != evals.data )
1685 {
1686 const uchar* p = evals0.ptr();
1687 if( evals0.size() == evals.size() )
1688 evals.convertTo(evals0, evals0.type());
1689 else if( evals0.type() == evals.type() )
1690 cv::transpose(evals, evals0);
1691 else
1692 cv::Mat(evals.t()).convertTo(evals0, evals0.type());
1693 CV_Assert( p == evals0.ptr() );
1694 }
1695 }
1696
1697
1698 CV_IMPL void
cvSVD(CvArr * aarr,CvArr * warr,CvArr * uarr,CvArr * varr,int flags)1699 cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags )
1700 {
1701 cv::Mat a = cv::cvarrToMat(aarr), w = cv::cvarrToMat(warr), u, v;
1702 int m = a.rows, n = a.cols, type = a.type(), mn = std::max(m, n), nm = std::min(m, n);
1703
1704 CV_Assert( w.type() == type &&
1705 (w.size() == cv::Size(nm,1) || w.size() == cv::Size(1, nm) ||
1706 w.size() == cv::Size(nm, nm) || w.size() == cv::Size(n, m)) );
1707
1708 cv::SVD svd;
1709
1710 if( w.size() == cv::Size(nm, 1) )
1711 svd.w = cv::Mat(nm, 1, type, w.ptr() );
1712 else if( w.isContinuous() )
1713 svd.w = w;
1714
1715 if( uarr )
1716 {
1717 u = cv::cvarrToMat(uarr);
1718 CV_Assert( u.type() == type );
1719 svd.u = u;
1720 }
1721
1722 if( varr )
1723 {
1724 v = cv::cvarrToMat(varr);
1725 CV_Assert( v.type() == type );
1726 svd.vt = v;
1727 }
1728
1729 svd(a, ((flags & CV_SVD_MODIFY_A) ? cv::SVD::MODIFY_A : 0) |
1730 ((!svd.u.data && !svd.vt.data) ? cv::SVD::NO_UV : 0) |
1731 ((m != n && (svd.u.size() == cv::Size(mn, mn) ||
1732 svd.vt.size() == cv::Size(mn, mn))) ? cv::SVD::FULL_UV : 0));
1733
1734 if( !u.empty() )
1735 {
1736 if( flags & CV_SVD_U_T )
1737 cv::transpose( svd.u, u );
1738 else if( u.data != svd.u.data )
1739 {
1740 CV_Assert( u.size() == svd.u.size() );
1741 svd.u.copyTo(u);
1742 }
1743 }
1744
1745 if( !v.empty() )
1746 {
1747 if( !(flags & CV_SVD_V_T) )
1748 cv::transpose( svd.vt, v );
1749 else if( v.data != svd.vt.data )
1750 {
1751 CV_Assert( v.size() == svd.vt.size() );
1752 svd.vt.copyTo(v);
1753 }
1754 }
1755
1756 if( w.data != svd.w.data )
1757 {
1758 if( w.size() == svd.w.size() )
1759 svd.w.copyTo(w);
1760 else
1761 {
1762 w = cv::Scalar(0);
1763 cv::Mat wd = w.diag();
1764 svd.w.copyTo(wd);
1765 }
1766 }
1767 }
1768
1769
1770 CV_IMPL void
cvSVBkSb(const CvArr * warr,const CvArr * uarr,const CvArr * varr,const CvArr * rhsarr,CvArr * dstarr,int flags)1771 cvSVBkSb( const CvArr* warr, const CvArr* uarr,
1772 const CvArr* varr, const CvArr* rhsarr,
1773 CvArr* dstarr, int flags )
1774 {
1775 cv::Mat w = cv::cvarrToMat(warr), u = cv::cvarrToMat(uarr),
1776 v = cv::cvarrToMat(varr), rhs,
1777 dst = cv::cvarrToMat(dstarr), dst0 = dst;
1778 if( flags & CV_SVD_U_T )
1779 {
1780 cv::Mat tmp;
1781 transpose(u, tmp);
1782 u = tmp;
1783 }
1784 if( !(flags & CV_SVD_V_T) )
1785 {
1786 cv::Mat tmp;
1787 transpose(v, tmp);
1788 v = tmp;
1789 }
1790 if( rhsarr )
1791 rhs = cv::cvarrToMat(rhsarr);
1792
1793 cv::SVD::backSubst(w, u, v, rhs, dst);
1794 CV_Assert( dst.data == dst0.data );
1795 }
1796 #endif // OPENCV_EXCLUDE_C_API
1797