1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html
4
5
6 #include "precomp.hpp"
7
8 namespace cv { namespace hal {
9
10 /****************************************************************************************\
11 * LU & Cholesky implementation for small matrices *
12 \****************************************************************************************/
13
14 template<typename _Tp> static inline int
LUImpl(_Tp * A,size_t astep,int m,_Tp * b,size_t bstep,int n,_Tp eps)15 LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
16 {
17 int i, j, k, p = 1;
18 astep /= sizeof(A[0]);
19 bstep /= sizeof(b[0]);
20
21 for( i = 0; i < m; i++ )
22 {
23 k = i;
24
25 for( j = i+1; j < m; j++ )
26 if( std::abs(A[j*astep + i]) > std::abs(A[k*astep + i]) )
27 k = j;
28
29 if( std::abs(A[k*astep + i]) < eps )
30 return 0;
31
32 if( k != i )
33 {
34 for( j = i; j < m; j++ )
35 std::swap(A[i*astep + j], A[k*astep + j]);
36 if( b )
37 for( j = 0; j < n; j++ )
38 std::swap(b[i*bstep + j], b[k*bstep + j]);
39 p = -p;
40 }
41
42 _Tp d = -1/A[i*astep + i];
43
44 for( j = i+1; j < m; j++ )
45 {
46 _Tp alpha = A[j*astep + i]*d;
47
48 for( k = i+1; k < m; k++ )
49 A[j*astep + k] += alpha*A[i*astep + k];
50
51 if( b )
52 for( k = 0; k < n; k++ )
53 b[j*bstep + k] += alpha*b[i*bstep + k];
54 }
55 }
56
57 if( b )
58 {
59 for( i = m-1; i >= 0; i-- )
60 for( j = 0; j < n; j++ )
61 {
62 _Tp s = b[i*bstep + j];
63 for( k = i+1; k < m; k++ )
64 s -= A[i*astep + k]*b[k*bstep + j];
65 b[i*bstep + j] = s/A[i*astep + i];
66 }
67 }
68
69 return p;
70 }
71
72
LU32f(float * A,size_t astep,int m,float * b,size_t bstep,int n)73 int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
74 {
75 CV_INSTRUMENT_REGION();
76
77 int output;
78 CALL_HAL_RET(LU32f, cv_hal_LU32f, output, A, astep, m, b, bstep, n)
79 output = LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
80 return output;
81 }
82
83
LU64f(double * A,size_t astep,int m,double * b,size_t bstep,int n)84 int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
85 {
86 CV_INSTRUMENT_REGION();
87
88 int output;
89 CALL_HAL_RET(LU64f, cv_hal_LU64f, output, A, astep, m, b, bstep, n)
90 output = LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
91 return output;
92 }
93
94 template<typename _Tp> static inline bool
CholImpl(_Tp * A,size_t astep,int m,_Tp * b,size_t bstep,int n)95 CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
96 {
97 _Tp* L = A;
98 int i, j, k;
99 double s;
100 astep /= sizeof(A[0]);
101 bstep /= sizeof(b[0]);
102
103 for( i = 0; i < m; i++ )
104 {
105 for( j = 0; j < i; j++ )
106 {
107 s = A[i*astep + j];
108 for( k = 0; k < j; k++ )
109 s -= L[i*astep + k]*L[j*astep + k];
110 L[i*astep + j] = (_Tp)(s*L[j*astep + j]);
111 }
112 s = A[i*astep + i];
113 for( k = 0; k < j; k++ )
114 {
115 double t = L[i*astep + k];
116 s -= t*t;
117 }
118 if( s < std::numeric_limits<_Tp>::epsilon() )
119 return false;
120 L[i*astep + i] = (_Tp)(1./std::sqrt(s));
121 }
122
123 if (!b)
124 {
125 for( i = 0; i < m; i++ )
126 L[i*astep + i]=1/L[i*astep + i];
127 return true;
128 }
129
130 // LLt x = b
131 // 1: L y = b
132 // 2. Lt x = y
133
134 /*
135 [ L00 ] y0 b0
136 [ L10 L11 ] y1 = b1
137 [ L20 L21 L22 ] y2 b2
138 [ L30 L31 L32 L33 ] y3 b3
139
140 [ L00 L10 L20 L30 ] x0 y0
141 [ L11 L21 L31 ] x1 = y1
142 [ L22 L32 ] x2 y2
143 [ L33 ] x3 y3
144 */
145
146 for( i = 0; i < m; i++ )
147 {
148 for( j = 0; j < n; j++ )
149 {
150 s = b[i*bstep + j];
151 for( k = 0; k < i; k++ )
152 s -= L[i*astep + k]*b[k*bstep + j];
153 b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
154 }
155 }
156
157 for( i = m-1; i >= 0; i-- )
158 {
159 for( j = 0; j < n; j++ )
160 {
161 s = b[i*bstep + j];
162 for( k = m-1; k > i; k-- )
163 s -= L[k*astep + i]*b[k*bstep + j];
164 b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
165 }
166 }
167 for( i = 0; i < m; i++ )
168 L[i*astep + i]=1/L[i*astep + i];
169
170 return true;
171 }
172
Cholesky32f(float * A,size_t astep,int m,float * b,size_t bstep,int n)173 bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
174 {
175 CV_INSTRUMENT_REGION();
176
177 bool output;
178 CALL_HAL_RET(Cholesky32f, cv_hal_Cholesky32f, output, A, astep, m, b, bstep, n)
179 return CholImpl(A, astep, m, b, bstep, n);
180 }
181
Cholesky64f(double * A,size_t astep,int m,double * b,size_t bstep,int n)182 bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
183 {
184 CV_INSTRUMENT_REGION();
185
186 bool output;
187 CALL_HAL_RET(Cholesky64f, cv_hal_Cholesky64f, output, A, astep, m, b, bstep, n)
188 return CholImpl(A, astep, m, b, bstep, n);
189 }
190
191 template<typename _Tp> inline static int
sign(_Tp x)192 sign(_Tp x)
193 {
194 if (x >= (_Tp)0)
195 return 1;
196 else
197 return -1;
198 }
199
200 template<typename _Tp> static inline int
QRImpl(_Tp * A,size_t astep,int m,int n,int k,_Tp * b,size_t bstep,_Tp * hFactors,_Tp eps)201 QRImpl(_Tp* A, size_t astep, int m, int n, int k, _Tp* b, size_t bstep, _Tp* hFactors, _Tp eps)
202 {
203 astep /= sizeof(_Tp);
204 bstep /= sizeof(_Tp);
205
206 cv::AutoBuffer<_Tp> buffer;
207 size_t buf_size = m ? m + n : hFactors != NULL;
208 buffer.allocate(buf_size);
209 _Tp* vl = buffer.data();
210 if (hFactors == NULL)
211 hFactors = vl + m;
212
213 for (int l = 0; l < n; l++)
214 {
215 //generate vl
216 int vlSize = m - l;
217 _Tp vlNorm = (_Tp)0;
218 for (int i = 0; i < vlSize; i++)
219 {
220 vl[i] = A[(l + i)*astep + l];
221 vlNorm += vl[i] * vl[i];
222 }
223 _Tp tmpV = vl[0];
224 vl[0] = vl[0] + sign(vl[0])*std::sqrt(vlNorm);
225 vlNorm = std::sqrt(vlNorm + vl[0] * vl[0] - tmpV*tmpV);
226 for (int i = 0; i < vlSize; i++)
227 {
228 vl[i] /= vlNorm;
229 }
230 //multiply A_l*vl
231 for (int j = l; j < n; j++)
232 {
233 _Tp v_lA = (_Tp)0;
234 for (int i = l; i < m; i++)
235 {
236 v_lA += vl[i - l] * A[i*astep + j];
237 }
238
239 for (int i = l; i < m; i++)
240 {
241 A[i*astep + j] -= 2 * vl[i - l] * v_lA;
242 }
243 }
244
245 //save vl and factors
246 hFactors[l] = vl[0] * vl[0];
247 for (int i = 1; i < vlSize; i++)
248 {
249 A[(l + i)*astep + l] = vl[i] / vl[0];
250 }
251 }
252
253 if (b)
254 {
255 //generate new rhs
256 for (int l = 0; l < n; l++)
257 {
258 //unpack vl
259 vl[0] = (_Tp)1;
260 for (int j = 1; j < m - l; j++)
261 {
262 vl[j] = A[(j + l)*astep + l];
263 }
264
265 //h_l*x
266 for (int j = 0; j < k; j++)
267 {
268 _Tp v_lB = (_Tp)0;
269 for (int i = l; i < m; i++)
270 v_lB += vl[i - l] * b[i*bstep + j];
271
272 for (int i = l; i < m; i++)
273 b[i*bstep + j] -= 2 * vl[i - l] * v_lB * hFactors[l];
274 }
275 }
276 //do back substitution
277 for (int i = n - 1; i >= 0; i--)
278 {
279 for (int j = n - 1; j > i; j--)
280 {
281 for (int p = 0; p < k; p++)
282 b[i*bstep + p] -= b[j*bstep + p] * A[i*astep + j];
283 }
284 if (std::abs(A[i*astep + i]) < eps)
285 return 0;
286 for (int p = 0; p < k; p++)
287 b[i*bstep + p] /= A[i*astep + i];
288 }
289 }
290
291 return 1;
292 }
293
QR32f(float * A,size_t astep,int m,int n,int k,float * b,size_t bstep,float * hFactors)294 int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors)
295 {
296 CV_INSTRUMENT_REGION();
297
298 int output;
299 CALL_HAL_RET(QR32f, cv_hal_QR32f, output, A, astep, m, n, k, b, bstep, hFactors);
300 output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, FLT_EPSILON * 10);
301 return output;
302 }
303
QR64f(double * A,size_t astep,int m,int n,int k,double * b,size_t bstep,double * hFactors)304 int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors)
305 {
306 CV_INSTRUMENT_REGION();
307
308 int output;
309 CALL_HAL_RET(QR64f, cv_hal_QR64f, output, A, astep, m, n, k, b, bstep, hFactors)
310 output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, DBL_EPSILON * 100);
311 return output;
312 }
313
314 //=============================================================================
315 // for compatibility with 3.0
316
LU(float * A,size_t astep,int m,float * b,size_t bstep,int n)317 int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
318 {
319 return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
320 }
321
LU(double * A,size_t astep,int m,double * b,size_t bstep,int n)322 int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
323 {
324 return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
325 }
326
Cholesky(float * A,size_t astep,int m,float * b,size_t bstep,int n)327 bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
328 {
329 return CholImpl(A, astep, m, b, bstep, n);
330 }
331
Cholesky(double * A,size_t astep,int m,double * b,size_t bstep,int n)332 bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
333 {
334 return CholImpl(A, astep, m, b, bstep, n);
335 }
336
337 }} // cv::hal::
338