1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html
4 
5 
6 #include "precomp.hpp"
7 
8 namespace cv { namespace hal {
9 
10 /****************************************************************************************\
11 *                     LU & Cholesky implementation for small matrices                    *
12 \****************************************************************************************/
13 
14 template<typename _Tp> static inline int
LUImpl(_Tp * A,size_t astep,int m,_Tp * b,size_t bstep,int n,_Tp eps)15 LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
16 {
17     int i, j, k, p = 1;
18     astep /= sizeof(A[0]);
19     bstep /= sizeof(b[0]);
20 
21     for( i = 0; i < m; i++ )
22     {
23         k = i;
24 
25         for( j = i+1; j < m; j++ )
26             if( std::abs(A[j*astep + i]) > std::abs(A[k*astep + i]) )
27                 k = j;
28 
29         if( std::abs(A[k*astep + i]) < eps )
30             return 0;
31 
32         if( k != i )
33         {
34             for( j = i; j < m; j++ )
35                 std::swap(A[i*astep + j], A[k*astep + j]);
36             if( b )
37                 for( j = 0; j < n; j++ )
38                     std::swap(b[i*bstep + j], b[k*bstep + j]);
39             p = -p;
40         }
41 
42         _Tp d = -1/A[i*astep + i];
43 
44         for( j = i+1; j < m; j++ )
45         {
46             _Tp alpha = A[j*astep + i]*d;
47 
48             for( k = i+1; k < m; k++ )
49                 A[j*astep + k] += alpha*A[i*astep + k];
50 
51             if( b )
52                 for( k = 0; k < n; k++ )
53                     b[j*bstep + k] += alpha*b[i*bstep + k];
54         }
55     }
56 
57     if( b )
58     {
59         for( i = m-1; i >= 0; i-- )
60             for( j = 0; j < n; j++ )
61             {
62                 _Tp s = b[i*bstep + j];
63                 for( k = i+1; k < m; k++ )
64                     s -= A[i*astep + k]*b[k*bstep + j];
65                 b[i*bstep + j] = s/A[i*astep + i];
66             }
67     }
68 
69     return p;
70 }
71 
72 
LU32f(float * A,size_t astep,int m,float * b,size_t bstep,int n)73 int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
74 {
75     CV_INSTRUMENT_REGION();
76 
77     int output;
78     CALL_HAL_RET(LU32f, cv_hal_LU32f, output, A, astep, m, b, bstep, n)
79     output = LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
80     return output;
81 }
82 
83 
LU64f(double * A,size_t astep,int m,double * b,size_t bstep,int n)84 int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
85 {
86     CV_INSTRUMENT_REGION();
87 
88     int output;
89     CALL_HAL_RET(LU64f, cv_hal_LU64f, output, A, astep, m, b, bstep, n)
90     output = LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
91     return output;
92 }
93 
94 template<typename _Tp> static inline bool
CholImpl(_Tp * A,size_t astep,int m,_Tp * b,size_t bstep,int n)95 CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
96 {
97     _Tp* L = A;
98     int i, j, k;
99     double s;
100     astep /= sizeof(A[0]);
101     bstep /= sizeof(b[0]);
102 
103     for( i = 0; i < m; i++ )
104     {
105         for( j = 0; j < i; j++ )
106         {
107             s = A[i*astep + j];
108             for( k = 0; k < j; k++ )
109                 s -= L[i*astep + k]*L[j*astep + k];
110             L[i*astep + j] = (_Tp)(s*L[j*astep + j]);
111         }
112         s = A[i*astep + i];
113         for( k = 0; k < j; k++ )
114         {
115             double t = L[i*astep + k];
116             s -= t*t;
117         }
118         if( s < std::numeric_limits<_Tp>::epsilon() )
119             return false;
120         L[i*astep + i] = (_Tp)(1./std::sqrt(s));
121     }
122 
123     if (!b)
124     {
125         for( i = 0; i < m; i++ )
126              L[i*astep + i]=1/L[i*astep + i];
127         return true;
128    }
129 
130     // LLt x = b
131     // 1: L y = b
132     // 2. Lt x = y
133 
134     /*
135      [ L00             ]  y0   b0
136      [ L10 L11         ]  y1 = b1
137      [ L20 L21 L22     ]  y2   b2
138      [ L30 L31 L32 L33 ]  y3   b3
139 
140      [ L00 L10 L20 L30 ]  x0   y0
141      [     L11 L21 L31 ]  x1 = y1
142      [         L22 L32 ]  x2   y2
143      [             L33 ]  x3   y3
144      */
145 
146     for( i = 0; i < m; i++ )
147     {
148         for( j = 0; j < n; j++ )
149         {
150             s = b[i*bstep + j];
151             for( k = 0; k < i; k++ )
152                 s -= L[i*astep + k]*b[k*bstep + j];
153             b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
154         }
155     }
156 
157     for( i = m-1; i >= 0; i-- )
158     {
159         for( j = 0; j < n; j++ )
160         {
161             s = b[i*bstep + j];
162             for( k = m-1; k > i; k-- )
163                 s -= L[k*astep + i]*b[k*bstep + j];
164             b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
165         }
166     }
167     for( i = 0; i < m; i++ )
168             L[i*astep + i]=1/L[i*astep + i];
169 
170     return true;
171 }
172 
Cholesky32f(float * A,size_t astep,int m,float * b,size_t bstep,int n)173 bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
174 {
175     CV_INSTRUMENT_REGION();
176 
177     bool output;
178     CALL_HAL_RET(Cholesky32f, cv_hal_Cholesky32f, output, A, astep, m, b, bstep, n)
179     return CholImpl(A, astep, m, b, bstep, n);
180 }
181 
Cholesky64f(double * A,size_t astep,int m,double * b,size_t bstep,int n)182 bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
183 {
184     CV_INSTRUMENT_REGION();
185 
186     bool output;
187     CALL_HAL_RET(Cholesky64f, cv_hal_Cholesky64f, output, A, astep, m, b, bstep, n)
188     return CholImpl(A, astep, m, b, bstep, n);
189 }
190 
191 template<typename _Tp> inline static int
sign(_Tp x)192 sign(_Tp x)
193 {
194     if (x >= (_Tp)0)
195         return 1;
196     else
197         return -1;
198 }
199 
200 template<typename _Tp> static inline int
QRImpl(_Tp * A,size_t astep,int m,int n,int k,_Tp * b,size_t bstep,_Tp * hFactors,_Tp eps)201 QRImpl(_Tp* A, size_t astep, int m, int n, int k, _Tp* b, size_t bstep, _Tp* hFactors, _Tp eps)
202 {
203     astep /= sizeof(_Tp);
204     bstep /= sizeof(_Tp);
205 
206     cv::AutoBuffer<_Tp> buffer;
207     size_t buf_size = m ? m + n : hFactors != NULL;
208     buffer.allocate(buf_size);
209     _Tp* vl = buffer.data();
210     if (hFactors == NULL)
211         hFactors = vl + m;
212 
213     for (int l = 0; l < n; l++)
214     {
215         //generate vl
216         int vlSize = m - l;
217         _Tp vlNorm = (_Tp)0;
218         for (int i = 0; i < vlSize; i++)
219         {
220             vl[i] = A[(l + i)*astep + l];
221             vlNorm += vl[i] * vl[i];
222         }
223         _Tp tmpV = vl[0];
224         vl[0] = vl[0] + sign(vl[0])*std::sqrt(vlNorm);
225         vlNorm = std::sqrt(vlNorm + vl[0] * vl[0] - tmpV*tmpV);
226         for (int i = 0; i < vlSize; i++)
227         {
228             vl[i] /= vlNorm;
229         }
230         //multiply A_l*vl
231         for (int j = l; j < n; j++)
232         {
233             _Tp v_lA = (_Tp)0;
234             for (int i = l; i < m; i++)
235             {
236                 v_lA += vl[i - l] * A[i*astep + j];
237             }
238 
239             for (int i = l; i < m; i++)
240             {
241                 A[i*astep + j] -= 2 * vl[i - l] * v_lA;
242             }
243         }
244 
245         //save vl and factors
246         hFactors[l] = vl[0] * vl[0];
247         for (int i = 1; i < vlSize; i++)
248         {
249             A[(l + i)*astep + l] = vl[i] / vl[0];
250         }
251     }
252 
253     if (b)
254     {
255         //generate new rhs
256         for (int l = 0; l < n; l++)
257         {
258             //unpack vl
259             vl[0] = (_Tp)1;
260             for (int j = 1; j < m - l; j++)
261             {
262               vl[j] = A[(j + l)*astep + l];
263             }
264 
265             //h_l*x
266             for (int j = 0; j < k; j++)
267             {
268                 _Tp v_lB = (_Tp)0;
269                 for (int i = l; i < m; i++)
270                   v_lB += vl[i - l] * b[i*bstep + j];
271 
272                 for (int i = l; i < m; i++)
273                     b[i*bstep + j] -= 2 * vl[i - l] * v_lB * hFactors[l];
274             }
275         }
276         //do back substitution
277         for (int i = n - 1; i >= 0; i--)
278         {
279             for (int j = n - 1; j > i; j--)
280             {
281                 for (int p = 0; p < k; p++)
282                     b[i*bstep + p] -= b[j*bstep + p] * A[i*astep + j];
283             }
284             if (std::abs(A[i*astep + i]) < eps)
285                 return 0;
286             for (int p = 0; p < k; p++)
287                 b[i*bstep + p] /= A[i*astep + i];
288         }
289     }
290 
291     return 1;
292 }
293 
QR32f(float * A,size_t astep,int m,int n,int k,float * b,size_t bstep,float * hFactors)294 int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors)
295 {
296     CV_INSTRUMENT_REGION();
297 
298     int output;
299     CALL_HAL_RET(QR32f, cv_hal_QR32f, output, A, astep, m, n, k, b, bstep, hFactors);
300     output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, FLT_EPSILON * 10);
301     return output;
302 }
303 
QR64f(double * A,size_t astep,int m,int n,int k,double * b,size_t bstep,double * hFactors)304 int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors)
305 {
306     CV_INSTRUMENT_REGION();
307 
308     int output;
309     CALL_HAL_RET(QR64f, cv_hal_QR64f, output, A, astep, m, n, k, b, bstep, hFactors)
310     output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, DBL_EPSILON * 100);
311     return output;
312 }
313 
314 //=============================================================================
315 // for compatibility with 3.0
316 
LU(float * A,size_t astep,int m,float * b,size_t bstep,int n)317 int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
318 {
319     return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
320 }
321 
LU(double * A,size_t astep,int m,double * b,size_t bstep,int n)322 int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
323 {
324     return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
325 }
326 
Cholesky(float * A,size_t astep,int m,float * b,size_t bstep,int n)327 bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
328 {
329     return CholImpl(A, astep, m, b, bstep, n);
330 }
331 
Cholesky(double * A,size_t astep,int m,double * b,size_t bstep,int n)332 bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
333 {
334     return CholImpl(A, astep, m, b, bstep, n);
335 }
336 
337 }} // cv::hal::
338