1 /*************************************************************************
2 ALGLIB 3.18.0 (source code generated 2021-10-25)
3 Copyright (c) Sergey Bochkanov (ALGLIB project).
4 
5 >>> SOURCE LICENSE >>>
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation (www.fsf.org); either version 2 of the
9 License, or (at your option) any later version.
10 
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 A copy of the GNU General Public License is available at
17 http://www.fsf.org/licensing/licenses
18 >>> END OF LICENSE >>>
19 *************************************************************************/
20 #ifdef _MSC_VER
21 #define _CRT_SECURE_NO_WARNINGS
22 #endif
23 #include "stdafx.h"
24 #include "linalg.h"
25 
26 // disable some irrelevant warnings
27 #if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
28 #pragma warning(disable:4100)
29 #pragma warning(disable:4127)
30 #pragma warning(disable:4611)
31 #pragma warning(disable:4702)
32 #pragma warning(disable:4996)
33 #endif
34 
35 /////////////////////////////////////////////////////////////////////////
36 //
37 // THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
38 //
39 /////////////////////////////////////////////////////////////////////////
40 namespace alglib
41 {
42 
43 #if defined(AE_COMPILE_ABLAS) || !defined(AE_PARTIAL_BUILD)
44 
45 #endif
46 
47 #if defined(AE_COMPILE_ORTFAC) || !defined(AE_PARTIAL_BUILD)
48 
49 #endif
50 
51 #if defined(AE_COMPILE_MATGEN) || !defined(AE_PARTIAL_BUILD)
52 
53 #endif
54 
55 #if defined(AE_COMPILE_SPARSE) || !defined(AE_PARTIAL_BUILD)
56 
57 #endif
58 
59 #if defined(AE_COMPILE_HSSCHUR) || !defined(AE_PARTIAL_BUILD)
60 
61 #endif
62 
63 #if defined(AE_COMPILE_EVD) || !defined(AE_PARTIAL_BUILD)
64 
65 #endif
66 
67 #if defined(AE_COMPILE_DLU) || !defined(AE_PARTIAL_BUILD)
68 
69 #endif
70 
71 #if defined(AE_COMPILE_SPTRF) || !defined(AE_PARTIAL_BUILD)
72 
73 #endif
74 
75 #if defined(AE_COMPILE_AMDORDERING) || !defined(AE_PARTIAL_BUILD)
76 
77 #endif
78 
79 #if defined(AE_COMPILE_SPCHOL) || !defined(AE_PARTIAL_BUILD)
80 
81 #endif
82 
83 #if defined(AE_COMPILE_TRFAC) || !defined(AE_PARTIAL_BUILD)
84 
85 #endif
86 
87 #if defined(AE_COMPILE_BDSVD) || !defined(AE_PARTIAL_BUILD)
88 
89 #endif
90 
91 #if defined(AE_COMPILE_SVD) || !defined(AE_PARTIAL_BUILD)
92 
93 #endif
94 
95 #if defined(AE_COMPILE_RCOND) || !defined(AE_PARTIAL_BUILD)
96 
97 #endif
98 
99 #if defined(AE_COMPILE_FBLS) || !defined(AE_PARTIAL_BUILD)
100 
101 #endif
102 
103 #if defined(AE_COMPILE_NORMESTIMATOR) || !defined(AE_PARTIAL_BUILD)
104 
105 #endif
106 
107 #if defined(AE_COMPILE_MATINV) || !defined(AE_PARTIAL_BUILD)
108 
109 #endif
110 
111 #if defined(AE_COMPILE_INVERSEUPDATE) || !defined(AE_PARTIAL_BUILD)
112 
113 #endif
114 
115 #if defined(AE_COMPILE_SCHUR) || !defined(AE_PARTIAL_BUILD)
116 
117 #endif
118 
119 #if defined(AE_COMPILE_SPDGEVD) || !defined(AE_PARTIAL_BUILD)
120 
121 #endif
122 
123 #if defined(AE_COMPILE_MATDET) || !defined(AE_PARTIAL_BUILD)
124 
125 #endif
126 
127 #if defined(AE_COMPILE_ABLAS) || !defined(AE_PARTIAL_BUILD)
128 /*************************************************************************
129 Cache-oblivous complex "copy-and-transpose"
130 
131 Input parameters:
132     M   -   number of rows
133     N   -   number of columns
134     A   -   source matrix, MxN submatrix is copied and transposed
135     IA  -   submatrix offset (row index)
136     JA  -   submatrix offset (column index)
137     B   -   destination matrix, must be large enough to store result
138     IB  -   submatrix offset (row index)
139     JB  -   submatrix offset (column index)
140 *************************************************************************/
cmatrixtranspose(const ae_int_t m,const ae_int_t n,const complex_2d_array & a,const ae_int_t ia,const ae_int_t ja,complex_2d_array & b,const ae_int_t ib,const ae_int_t jb,const xparams _xparams)141 void cmatrixtranspose(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
142 {
143     jmp_buf _break_jump;
144     alglib_impl::ae_state _alglib_env_state;
145     alglib_impl::ae_state_init(&_alglib_env_state);
146     if( setjmp(_break_jump) )
147     {
148 #if !defined(AE_NO_EXCEPTIONS)
149         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
150 #else
151         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
152         return;
153 #endif
154     }
155     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
156     if( _xparams.flags!=0x0 )
157         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
158     alglib_impl::cmatrixtranspose(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
159     alglib_impl::ae_state_clear(&_alglib_env_state);
160     return;
161 }
162 
163 /*************************************************************************
164 Cache-oblivous real "copy-and-transpose"
165 
166 Input parameters:
167     M   -   number of rows
168     N   -   number of columns
169     A   -   source matrix, MxN submatrix is copied and transposed
170     IA  -   submatrix offset (row index)
171     JA  -   submatrix offset (column index)
172     B   -   destination matrix, must be large enough to store result
173     IB  -   submatrix offset (row index)
174     JB  -   submatrix offset (column index)
175 *************************************************************************/
rmatrixtranspose(const ae_int_t m,const ae_int_t n,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const real_2d_array & b,const ae_int_t ib,const ae_int_t jb,const xparams _xparams)176 void rmatrixtranspose(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
177 {
178     jmp_buf _break_jump;
179     alglib_impl::ae_state _alglib_env_state;
180     alglib_impl::ae_state_init(&_alglib_env_state);
181     if( setjmp(_break_jump) )
182     {
183 #if !defined(AE_NO_EXCEPTIONS)
184         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
185 #else
186         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
187         return;
188 #endif
189     }
190     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
191     if( _xparams.flags!=0x0 )
192         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
193     alglib_impl::rmatrixtranspose(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
194     alglib_impl::ae_state_clear(&_alglib_env_state);
195     return;
196 }
197 
198 /*************************************************************************
199 This code enforces symmetricy of the matrix by copying Upper part to lower
200 one (or vice versa).
201 
202 INPUT PARAMETERS:
203     A   -   matrix
204     N   -   number of rows/columns
205     IsUpper - whether we want to copy upper triangle to lower one (True)
206             or vice versa (False).
207 *************************************************************************/
rmatrixenforcesymmetricity(const real_2d_array & a,const ae_int_t n,const bool isupper,const xparams _xparams)208 void rmatrixenforcesymmetricity(const real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
209 {
210     jmp_buf _break_jump;
211     alglib_impl::ae_state _alglib_env_state;
212     alglib_impl::ae_state_init(&_alglib_env_state);
213     if( setjmp(_break_jump) )
214     {
215 #if !defined(AE_NO_EXCEPTIONS)
216         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
217 #else
218         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
219         return;
220 #endif
221     }
222     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
223     if( _xparams.flags!=0x0 )
224         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
225     alglib_impl::rmatrixenforcesymmetricity(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
226     alglib_impl::ae_state_clear(&_alglib_env_state);
227     return;
228 }
229 
230 /*************************************************************************
231 Copy
232 
233 Input parameters:
234     M   -   number of rows
235     N   -   number of columns
236     A   -   source matrix, MxN submatrix is copied and transposed
237     IA  -   submatrix offset (row index)
238     JA  -   submatrix offset (column index)
239     B   -   destination matrix, must be large enough to store result
240     IB  -   submatrix offset (row index)
241     JB  -   submatrix offset (column index)
242 *************************************************************************/
cmatrixcopy(const ae_int_t m,const ae_int_t n,const complex_2d_array & a,const ae_int_t ia,const ae_int_t ja,complex_2d_array & b,const ae_int_t ib,const ae_int_t jb,const xparams _xparams)243 void cmatrixcopy(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
244 {
245     jmp_buf _break_jump;
246     alglib_impl::ae_state _alglib_env_state;
247     alglib_impl::ae_state_init(&_alglib_env_state);
248     if( setjmp(_break_jump) )
249     {
250 #if !defined(AE_NO_EXCEPTIONS)
251         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
252 #else
253         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
254         return;
255 #endif
256     }
257     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
258     if( _xparams.flags!=0x0 )
259         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
260     alglib_impl::cmatrixcopy(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
261     alglib_impl::ae_state_clear(&_alglib_env_state);
262     return;
263 }
264 
265 /*************************************************************************
266 Copy
267 
268 Input parameters:
269     N   -   subvector size
270     A   -   source vector, N elements are copied
271     IA  -   source offset (first element index)
272     B   -   destination vector, must be large enough to store result
273     IB  -   destination offset (first element index)
274 *************************************************************************/
rvectorcopy(const ae_int_t n,const real_1d_array & a,const ae_int_t ia,const real_1d_array & b,const ae_int_t ib,const xparams _xparams)275 void rvectorcopy(const ae_int_t n, const real_1d_array &a, const ae_int_t ia, const real_1d_array &b, const ae_int_t ib, const xparams _xparams)
276 {
277     jmp_buf _break_jump;
278     alglib_impl::ae_state _alglib_env_state;
279     alglib_impl::ae_state_init(&_alglib_env_state);
280     if( setjmp(_break_jump) )
281     {
282 #if !defined(AE_NO_EXCEPTIONS)
283         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
284 #else
285         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
286         return;
287 #endif
288     }
289     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
290     if( _xparams.flags!=0x0 )
291         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
292     alglib_impl::rvectorcopy(n, const_cast<alglib_impl::ae_vector*>(a.c_ptr()), ia, const_cast<alglib_impl::ae_vector*>(b.c_ptr()), ib, &_alglib_env_state);
293     alglib_impl::ae_state_clear(&_alglib_env_state);
294     return;
295 }
296 
297 /*************************************************************************
298 Copy
299 
300 Input parameters:
301     M   -   number of rows
302     N   -   number of columns
303     A   -   source matrix, MxN submatrix is copied and transposed
304     IA  -   submatrix offset (row index)
305     JA  -   submatrix offset (column index)
306     B   -   destination matrix, must be large enough to store result
307     IB  -   submatrix offset (row index)
308     JB  -   submatrix offset (column index)
309 *************************************************************************/
rmatrixcopy(const ae_int_t m,const ae_int_t n,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const real_2d_array & b,const ae_int_t ib,const ae_int_t jb,const xparams _xparams)310 void rmatrixcopy(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
311 {
312     jmp_buf _break_jump;
313     alglib_impl::ae_state _alglib_env_state;
314     alglib_impl::ae_state_init(&_alglib_env_state);
315     if( setjmp(_break_jump) )
316     {
317 #if !defined(AE_NO_EXCEPTIONS)
318         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
319 #else
320         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
321         return;
322 #endif
323     }
324     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
325     if( _xparams.flags!=0x0 )
326         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
327     alglib_impl::rmatrixcopy(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
328     alglib_impl::ae_state_clear(&_alglib_env_state);
329     return;
330 }
331 
332 /*************************************************************************
333 Performs generalized copy: B := Beta*B + Alpha*A.
334 
335 If Beta=0, then previous contents of B is simply ignored. If Alpha=0, then
336 A is ignored and not referenced. If both Alpha and Beta  are  zero,  B  is
337 filled by zeros.
338 
339 Input parameters:
340     M   -   number of rows
341     N   -   number of columns
342     Alpha-  coefficient
343     A   -   source matrix, MxN submatrix is copied and transposed
344     IA  -   submatrix offset (row index)
345     JA  -   submatrix offset (column index)
346     Beta-   coefficient
347     B   -   destination matrix, must be large enough to store result
348     IB  -   submatrix offset (row index)
349     JB  -   submatrix offset (column index)
350 *************************************************************************/
rmatrixgencopy(const ae_int_t m,const ae_int_t n,const double alpha,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const double beta,const real_2d_array & b,const ae_int_t ib,const ae_int_t jb,const xparams _xparams)351 void rmatrixgencopy(const ae_int_t m, const ae_int_t n, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const double beta, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const xparams _xparams)
352 {
353     jmp_buf _break_jump;
354     alglib_impl::ae_state _alglib_env_state;
355     alglib_impl::ae_state_init(&_alglib_env_state);
356     if( setjmp(_break_jump) )
357     {
358 #if !defined(AE_NO_EXCEPTIONS)
359         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
360 #else
361         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
362         return;
363 #endif
364     }
365     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
366     if( _xparams.flags!=0x0 )
367         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
368     alglib_impl::rmatrixgencopy(m, n, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, beta, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, &_alglib_env_state);
369     alglib_impl::ae_state_clear(&_alglib_env_state);
370     return;
371 }
372 
373 /*************************************************************************
374 Rank-1 correction: A := A + alpha*u*v'
375 
376 NOTE: this  function  expects  A  to  be  large enough to store result. No
377       automatic preallocation happens for  smaller  arrays.  No  integrity
378       checks is performed for sizes of A, u, v.
379 
380 INPUT PARAMETERS:
381     M   -   number of rows
382     N   -   number of columns
383     A   -   target matrix, MxN submatrix is updated
384     IA  -   submatrix offset (row index)
385     JA  -   submatrix offset (column index)
386     Alpha-  coefficient
387     U   -   vector #1
388     IU  -   subvector offset
389     V   -   vector #2
390     IV  -   subvector offset
391 
392 
393   -- ALGLIB routine --
394 
395      16.10.2017
396      Bochkanov Sergey
397 *************************************************************************/
rmatrixger(const ae_int_t m,const ae_int_t n,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const double alpha,const real_1d_array & u,const ae_int_t iu,const real_1d_array & v,const ae_int_t iv,const xparams _xparams)398 void rmatrixger(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const double alpha, const real_1d_array &u, const ae_int_t iu, const real_1d_array &v, const ae_int_t iv, const xparams _xparams)
399 {
400     jmp_buf _break_jump;
401     alglib_impl::ae_state _alglib_env_state;
402     alglib_impl::ae_state_init(&_alglib_env_state);
403     if( setjmp(_break_jump) )
404     {
405 #if !defined(AE_NO_EXCEPTIONS)
406         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
407 #else
408         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
409         return;
410 #endif
411     }
412     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
413     if( _xparams.flags!=0x0 )
414         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
415     alglib_impl::rmatrixger(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, alpha, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state);
416     alglib_impl::ae_state_clear(&_alglib_env_state);
417     return;
418 }
419 
420 /*************************************************************************
421 Rank-1 correction: A := A + u*v'
422 
423 INPUT PARAMETERS:
424     M   -   number of rows
425     N   -   number of columns
426     A   -   target matrix, MxN submatrix is updated
427     IA  -   submatrix offset (row index)
428     JA  -   submatrix offset (column index)
429     U   -   vector #1
430     IU  -   subvector offset
431     V   -   vector #2
432     IV  -   subvector offset
433 *************************************************************************/
cmatrixrank1(const ae_int_t m,const ae_int_t n,complex_2d_array & a,const ae_int_t ia,const ae_int_t ja,complex_1d_array & u,const ae_int_t iu,complex_1d_array & v,const ae_int_t iv,const xparams _xparams)434 void cmatrixrank1(const ae_int_t m, const ae_int_t n, complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, complex_1d_array &u, const ae_int_t iu, complex_1d_array &v, const ae_int_t iv, const xparams _xparams)
435 {
436     jmp_buf _break_jump;
437     alglib_impl::ae_state _alglib_env_state;
438     alglib_impl::ae_state_init(&_alglib_env_state);
439     if( setjmp(_break_jump) )
440     {
441 #if !defined(AE_NO_EXCEPTIONS)
442         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
443 #else
444         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
445         return;
446 #endif
447     }
448     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
449     if( _xparams.flags!=0x0 )
450         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
451     alglib_impl::cmatrixrank1(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state);
452     alglib_impl::ae_state_clear(&_alglib_env_state);
453     return;
454 }
455 
456 /*************************************************************************
457 IMPORTANT: this function is deprecated since ALGLIB 3.13. Use RMatrixGER()
458            which is more generic version of this function.
459 
460 Rank-1 correction: A := A + u*v'
461 
462 INPUT PARAMETERS:
463     M   -   number of rows
464     N   -   number of columns
465     A   -   target matrix, MxN submatrix is updated
466     IA  -   submatrix offset (row index)
467     JA  -   submatrix offset (column index)
468     U   -   vector #1
469     IU  -   subvector offset
470     V   -   vector #2
471     IV  -   subvector offset
472 *************************************************************************/
rmatrixrank1(const ae_int_t m,const ae_int_t n,real_2d_array & a,const ae_int_t ia,const ae_int_t ja,real_1d_array & u,const ae_int_t iu,real_1d_array & v,const ae_int_t iv,const xparams _xparams)473 void rmatrixrank1(const ae_int_t m, const ae_int_t n, real_2d_array &a, const ae_int_t ia, const ae_int_t ja, real_1d_array &u, const ae_int_t iu, real_1d_array &v, const ae_int_t iv, const xparams _xparams)
474 {
475     jmp_buf _break_jump;
476     alglib_impl::ae_state _alglib_env_state;
477     alglib_impl::ae_state_init(&_alglib_env_state);
478     if( setjmp(_break_jump) )
479     {
480 #if !defined(AE_NO_EXCEPTIONS)
481         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
482 #else
483         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
484         return;
485 #endif
486     }
487     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
488     if( _xparams.flags!=0x0 )
489         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
490     alglib_impl::rmatrixrank1(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), iu, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), iv, &_alglib_env_state);
491     alglib_impl::ae_state_clear(&_alglib_env_state);
492     return;
493 }
494 
495 /*************************************************************************
496 
497 *************************************************************************/
rmatrixgemv(const ae_int_t m,const ae_int_t n,const double alpha,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const ae_int_t opa,const real_1d_array & x,const ae_int_t ix,const double beta,const real_1d_array & y,const ae_int_t iy,const xparams _xparams)498 void rmatrixgemv(const ae_int_t m, const ae_int_t n, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const real_1d_array &x, const ae_int_t ix, const double beta, const real_1d_array &y, const ae_int_t iy, const xparams _xparams)
499 {
500     jmp_buf _break_jump;
501     alglib_impl::ae_state _alglib_env_state;
502     alglib_impl::ae_state_init(&_alglib_env_state);
503     if( setjmp(_break_jump) )
504     {
505 #if !defined(AE_NO_EXCEPTIONS)
506         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
507 #else
508         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
509         return;
510 #endif
511     }
512     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
513     if( _xparams.flags!=0x0 )
514         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
515     alglib_impl::rmatrixgemv(m, n, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, beta, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
516     alglib_impl::ae_state_clear(&_alglib_env_state);
517     return;
518 }
519 
520 /*************************************************************************
521 Matrix-vector product: y := op(A)*x
522 
523 INPUT PARAMETERS:
524     M   -   number of rows of op(A)
525             M>=0
526     N   -   number of columns of op(A)
527             N>=0
528     A   -   target matrix
529     IA  -   submatrix offset (row index)
530     JA  -   submatrix offset (column index)
531     OpA -   operation type:
532             * OpA=0     =>  op(A) = A
533             * OpA=1     =>  op(A) = A^T
534             * OpA=2     =>  op(A) = A^H
535     X   -   input vector
536     IX  -   subvector offset
537     IY  -   subvector offset
538     Y   -   preallocated matrix, must be large enough to store result
539 
540 OUTPUT PARAMETERS:
541     Y   -   vector which stores result
542 
543 if M=0, then subroutine does nothing.
544 if N=0, Y is filled by zeros.
545 
546 
547   -- ALGLIB routine --
548 
549      28.01.2010
550      Bochkanov Sergey
551 *************************************************************************/
cmatrixmv(const ae_int_t m,const ae_int_t n,const complex_2d_array & a,const ae_int_t ia,const ae_int_t ja,const ae_int_t opa,const complex_1d_array & x,const ae_int_t ix,complex_1d_array & y,const ae_int_t iy,const xparams _xparams)552 void cmatrixmv(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const complex_1d_array &x, const ae_int_t ix, complex_1d_array &y, const ae_int_t iy, const xparams _xparams)
553 {
554     jmp_buf _break_jump;
555     alglib_impl::ae_state _alglib_env_state;
556     alglib_impl::ae_state_init(&_alglib_env_state);
557     if( setjmp(_break_jump) )
558     {
559 #if !defined(AE_NO_EXCEPTIONS)
560         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
561 #else
562         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
563         return;
564 #endif
565     }
566     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
567     if( _xparams.flags!=0x0 )
568         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
569     alglib_impl::cmatrixmv(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
570     alglib_impl::ae_state_clear(&_alglib_env_state);
571     return;
572 }
573 
574 /*************************************************************************
575 IMPORTANT: this function is deprecated since ALGLIB 3.13. Use RMatrixGEMV()
576            which is more generic version of this function.
577 
578 Matrix-vector product: y := op(A)*x
579 
580 INPUT PARAMETERS:
581     M   -   number of rows of op(A)
582     N   -   number of columns of op(A)
583     A   -   target matrix
584     IA  -   submatrix offset (row index)
585     JA  -   submatrix offset (column index)
586     OpA -   operation type:
587             * OpA=0     =>  op(A) = A
588             * OpA=1     =>  op(A) = A^T
589     X   -   input vector
590     IX  -   subvector offset
591     IY  -   subvector offset
592     Y   -   preallocated matrix, must be large enough to store result
593 
594 OUTPUT PARAMETERS:
595     Y   -   vector which stores result
596 
597 if M=0, then subroutine does nothing.
598 if N=0, Y is filled by zeros.
599 
600 
601   -- ALGLIB routine --
602 
603      28.01.2010
604      Bochkanov Sergey
605 *************************************************************************/
rmatrixmv(const ae_int_t m,const ae_int_t n,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const ae_int_t opa,const real_1d_array & x,const ae_int_t ix,const real_1d_array & y,const ae_int_t iy,const xparams _xparams)606 void rmatrixmv(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t opa, const real_1d_array &x, const ae_int_t ix, const real_1d_array &y, const ae_int_t iy, const xparams _xparams)
607 {
608     jmp_buf _break_jump;
609     alglib_impl::ae_state _alglib_env_state;
610     alglib_impl::ae_state_init(&_alglib_env_state);
611     if( setjmp(_break_jump) )
612     {
613 #if !defined(AE_NO_EXCEPTIONS)
614         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
615 #else
616         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
617         return;
618 #endif
619     }
620     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
621     if( _xparams.flags!=0x0 )
622         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
623     alglib_impl::rmatrixmv(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, opa, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
624     alglib_impl::ae_state_clear(&_alglib_env_state);
625     return;
626 }
627 
628 /*************************************************************************
629 
630 *************************************************************************/
rmatrixsymv(const ae_int_t n,const double alpha,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const bool isupper,const real_1d_array & x,const ae_int_t ix,const double beta,const real_1d_array & y,const ae_int_t iy,const xparams _xparams)631 void rmatrixsymv(const ae_int_t n, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const bool isupper, const real_1d_array &x, const ae_int_t ix, const double beta, const real_1d_array &y, const ae_int_t iy, const xparams _xparams)
632 {
633     jmp_buf _break_jump;
634     alglib_impl::ae_state _alglib_env_state;
635     alglib_impl::ae_state_init(&_alglib_env_state);
636     if( setjmp(_break_jump) )
637     {
638 #if !defined(AE_NO_EXCEPTIONS)
639         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
640 #else
641         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
642         return;
643 #endif
644     }
645     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
646     if( _xparams.flags!=0x0 )
647         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
648     alglib_impl::rmatrixsymv(n, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, isupper, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, beta, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
649     alglib_impl::ae_state_clear(&_alglib_env_state);
650     return;
651 }
652 
653 /*************************************************************************
654 
655 *************************************************************************/
rmatrixsyvmv(const ae_int_t n,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const bool isupper,const real_1d_array & x,const ae_int_t ix,const real_1d_array & tmp,const xparams _xparams)656 double rmatrixsyvmv(const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const bool isupper, const real_1d_array &x, const ae_int_t ix, const real_1d_array &tmp, const xparams _xparams)
657 {
658     jmp_buf _break_jump;
659     alglib_impl::ae_state _alglib_env_state;
660     alglib_impl::ae_state_init(&_alglib_env_state);
661     if( setjmp(_break_jump) )
662     {
663 #if !defined(AE_NO_EXCEPTIONS)
664         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
665 #else
666         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
667         return 0;
668 #endif
669     }
670     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
671     if( _xparams.flags!=0x0 )
672         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
673     double result = alglib_impl::rmatrixsyvmv(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, isupper, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, const_cast<alglib_impl::ae_vector*>(tmp.c_ptr()), &_alglib_env_state);
674     alglib_impl::ae_state_clear(&_alglib_env_state);
675     return *(reinterpret_cast<double*>(&result));
676 }
677 
678 /*************************************************************************
679 This subroutine solves linear system op(A)*x=b where:
680 * A is NxN upper/lower triangular/unitriangular matrix
681 * X and B are Nx1 vectors
682 * "op" may be identity transformation or transposition
683 
684 Solution replaces X.
685 
686 IMPORTANT: * no overflow/underflow/denegeracy tests is performed.
687            * no integrity checks for operand sizes, out-of-bounds accesses
688              and so on is performed
689 
690 INPUT PARAMETERS
691     N   -   matrix size, N>=0
692     A       -   matrix, actial matrix is stored in A[IA:IA+N-1,JA:JA+N-1]
693     IA      -   submatrix offset
694     JA      -   submatrix offset
695     IsUpper -   whether matrix is upper triangular
696     IsUnit  -   whether matrix is unitriangular
697     OpType  -   transformation type:
698                 * 0 - no transformation
699                 * 1 - transposition
700     X       -   right part, actual vector is stored in X[IX:IX+N-1]
701     IX      -   offset
702 
703 OUTPUT PARAMETERS
704     X       -   solution replaces elements X[IX:IX+N-1]
705 
706   -- ALGLIB routine / remastering of LAPACK's DTRSV --
707      (c) 2017 Bochkanov Sergey - converted to ALGLIB
708      (c) 2016 Reference BLAS level1 routine (LAPACK version 3.7.0)
709      Reference BLAS is a software package provided by Univ. of Tennessee,
710      Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.
711 *************************************************************************/
rmatrixtrsv(const ae_int_t n,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const bool isupper,const bool isunit,const ae_int_t optype,const real_1d_array & x,const ae_int_t ix,const xparams _xparams)712 void rmatrixtrsv(const ae_int_t n, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const bool isupper, const bool isunit, const ae_int_t optype, const real_1d_array &x, const ae_int_t ix, const xparams _xparams)
713 {
714     jmp_buf _break_jump;
715     alglib_impl::ae_state _alglib_env_state;
716     alglib_impl::ae_state_init(&_alglib_env_state);
717     if( setjmp(_break_jump) )
718     {
719 #if !defined(AE_NO_EXCEPTIONS)
720         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
721 #else
722         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
723         return;
724 #endif
725     }
726     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
727     if( _xparams.flags!=0x0 )
728         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
729     alglib_impl::rmatrixtrsv(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, isupper, isunit, optype, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, &_alglib_env_state);
730     alglib_impl::ae_state_clear(&_alglib_env_state);
731     return;
732 }
733 
734 /*************************************************************************
735 This subroutine calculates X*op(A^-1) where:
736 * X is MxN general matrix
737 * A is NxN upper/lower triangular/unitriangular matrix
738 * "op" may be identity transformation, transposition, conjugate transposition
739 Multiplication result replaces X.
740 
741 INPUT PARAMETERS
742     N   -   matrix size, N>=0
743     M   -   matrix size, N>=0
744     A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
745     I1      -   submatrix offset
746     J1      -   submatrix offset
747     IsUpper -   whether matrix is upper triangular
748     IsUnit  -   whether matrix is unitriangular
749     OpType  -   transformation type:
750                 * 0 - no transformation
751                 * 1 - transposition
752                 * 2 - conjugate transposition
753     X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
754     I2  -   submatrix offset
755     J2  -   submatrix offset
756 
757   ! FREE EDITION OF ALGLIB:
758   !
759   ! Free Edition of ALGLIB supports following important features for  this
760   ! function:
761   ! * C++ version: x64 SIMD support using C++ intrinsics
762   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
763   !
764   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
765   ! Reference Manual in order  to  find  out  how to activate SIMD support
766   ! in ALGLIB.
767 
768   ! COMMERCIAL EDITION OF ALGLIB:
769   !
770   ! Commercial Edition of ALGLIB includes following important improvements
771   ! of this function:
772   ! * high-performance native backend with same C# interface (C# version)
773   ! * multithreading support (C++ and C# versions)
774   ! * hardware vendor (Intel) implementations of linear algebra primitives
775   !   (C++ and C# versions, x86/x64 platform)
776   !
777   ! We recommend you to read 'Working with commercial version' section  of
778   ! ALGLIB Reference Manual in order to find out how to  use  performance-
779   ! related features provided by commercial edition of ALGLIB.
780 
781   -- ALGLIB routine --
782      20.01.2018
783      Bochkanov Sergey
784 *************************************************************************/
cmatrixrighttrsm(const ae_int_t m,const ae_int_t n,const complex_2d_array & a,const ae_int_t i1,const ae_int_t j1,const bool isupper,const bool isunit,const ae_int_t optype,const complex_2d_array & x,const ae_int_t i2,const ae_int_t j2,const xparams _xparams)785 void cmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, const complex_2d_array &x, const ae_int_t i2, const ae_int_t j2, const xparams _xparams)
786 {
787     jmp_buf _break_jump;
788     alglib_impl::ae_state _alglib_env_state;
789     alglib_impl::ae_state_init(&_alglib_env_state);
790     if( setjmp(_break_jump) )
791     {
792 #if !defined(AE_NO_EXCEPTIONS)
793         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
794 #else
795         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
796         return;
797 #endif
798     }
799     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
800     if( _xparams.flags!=0x0 )
801         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
802     alglib_impl::cmatrixrighttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
803     alglib_impl::ae_state_clear(&_alglib_env_state);
804     return;
805 }
806 
807 /*************************************************************************
808 This subroutine calculates op(A^-1)*X where:
809 * X is MxN general matrix
810 * A is MxM upper/lower triangular/unitriangular matrix
811 * "op" may be identity transformation, transposition, conjugate transposition
812 Multiplication result replaces X.
813 
814 INPUT PARAMETERS
815     N   -   matrix size, N>=0
816     M   -   matrix size, N>=0
817     A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
818     I1      -   submatrix offset
819     J1      -   submatrix offset
820     IsUpper -   whether matrix is upper triangular
821     IsUnit  -   whether matrix is unitriangular
822     OpType  -   transformation type:
823                 * 0 - no transformation
824                 * 1 - transposition
825                 * 2 - conjugate transposition
826     X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
827     I2  -   submatrix offset
828     J2  -   submatrix offset
829 
830   ! FREE EDITION OF ALGLIB:
831   !
832   ! Free Edition of ALGLIB supports following important features for  this
833   ! function:
834   ! * C++ version: x64 SIMD support using C++ intrinsics
835   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
836   !
837   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
838   ! Reference Manual in order  to  find  out  how to activate SIMD support
839   ! in ALGLIB.
840 
841   ! COMMERCIAL EDITION OF ALGLIB:
842   !
843   ! Commercial Edition of ALGLIB includes following important improvements
844   ! of this function:
845   ! * high-performance native backend with same C# interface (C# version)
846   ! * multithreading support (C++ and C# versions)
847   ! * hardware vendor (Intel) implementations of linear algebra primitives
848   !   (C++ and C# versions, x86/x64 platform)
849   !
850   ! We recommend you to read 'Working with commercial version' section  of
851   ! ALGLIB Reference Manual in order to find out how to  use  performance-
852   ! related features provided by commercial edition of ALGLIB.
853 
854   -- ALGLIB routine --
855      15.12.2009-22.01.2018
856      Bochkanov Sergey
857 *************************************************************************/
cmatrixlefttrsm(const ae_int_t m,const ae_int_t n,const complex_2d_array & a,const ae_int_t i1,const ae_int_t j1,const bool isupper,const bool isunit,const ae_int_t optype,const complex_2d_array & x,const ae_int_t i2,const ae_int_t j2,const xparams _xparams)858 void cmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const complex_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, const complex_2d_array &x, const ae_int_t i2, const ae_int_t j2, const xparams _xparams)
859 {
860     jmp_buf _break_jump;
861     alglib_impl::ae_state _alglib_env_state;
862     alglib_impl::ae_state_init(&_alglib_env_state);
863     if( setjmp(_break_jump) )
864     {
865 #if !defined(AE_NO_EXCEPTIONS)
866         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
867 #else
868         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
869         return;
870 #endif
871     }
872     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
873     if( _xparams.flags!=0x0 )
874         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
875     alglib_impl::cmatrixlefttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
876     alglib_impl::ae_state_clear(&_alglib_env_state);
877     return;
878 }
879 
880 /*************************************************************************
881 This subroutine calculates X*op(A^-1) where:
882 * X is MxN general matrix
883 * A is NxN upper/lower triangular/unitriangular matrix
884 * "op" may be identity transformation, transposition
885 Multiplication result replaces X.
886 
887 INPUT PARAMETERS
888     N   -   matrix size, N>=0
889     M   -   matrix size, N>=0
890     A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
891     I1      -   submatrix offset
892     J1      -   submatrix offset
893     IsUpper -   whether matrix is upper triangular
894     IsUnit  -   whether matrix is unitriangular
895     OpType  -   transformation type:
896                 * 0 - no transformation
897                 * 1 - transposition
898     X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
899     I2  -   submatrix offset
900     J2  -   submatrix offset
901 
902   ! FREE EDITION OF ALGLIB:
903   !
904   ! Free Edition of ALGLIB supports following important features for  this
905   ! function:
906   ! * C++ version: x64 SIMD support using C++ intrinsics
907   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
908   !
909   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
910   ! Reference Manual in order  to  find  out  how to activate SIMD support
911   ! in ALGLIB.
912 
913   ! COMMERCIAL EDITION OF ALGLIB:
914   !
915   ! Commercial Edition of ALGLIB includes following important improvements
916   ! of this function:
917   ! * high-performance native backend with same C# interface (C# version)
918   ! * multithreading support (C++ and C# versions)
919   ! * hardware vendor (Intel) implementations of linear algebra primitives
920   !   (C++ and C# versions, x86/x64 platform)
921   !
922   ! We recommend you to read 'Working with commercial version' section  of
923   ! ALGLIB Reference Manual in order to find out how to  use  performance-
924   ! related features provided by commercial edition of ALGLIB.
925 
926   -- ALGLIB routine --
927      15.12.2009-22.01.2018
928      Bochkanov Sergey
929 *************************************************************************/
rmatrixrighttrsm(const ae_int_t m,const ae_int_t n,const real_2d_array & a,const ae_int_t i1,const ae_int_t j1,const bool isupper,const bool isunit,const ae_int_t optype,const real_2d_array & x,const ae_int_t i2,const ae_int_t j2,const xparams _xparams)930 void rmatrixrighttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, const real_2d_array &x, const ae_int_t i2, const ae_int_t j2, const xparams _xparams)
931 {
932     jmp_buf _break_jump;
933     alglib_impl::ae_state _alglib_env_state;
934     alglib_impl::ae_state_init(&_alglib_env_state);
935     if( setjmp(_break_jump) )
936     {
937 #if !defined(AE_NO_EXCEPTIONS)
938         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
939 #else
940         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
941         return;
942 #endif
943     }
944     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
945     if( _xparams.flags!=0x0 )
946         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
947     alglib_impl::rmatrixrighttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
948     alglib_impl::ae_state_clear(&_alglib_env_state);
949     return;
950 }
951 
952 /*************************************************************************
953 This subroutine calculates op(A^-1)*X where:
954 * X is MxN general matrix
955 * A is MxM upper/lower triangular/unitriangular matrix
956 * "op" may be identity transformation, transposition
957 Multiplication result replaces X.
958 
959 INPUT PARAMETERS
960     N   -   matrix size, N>=0
961     M   -   matrix size, N>=0
962     A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
963     I1      -   submatrix offset
964     J1      -   submatrix offset
965     IsUpper -   whether matrix is upper triangular
966     IsUnit  -   whether matrix is unitriangular
967     OpType  -   transformation type:
968                 * 0 - no transformation
969                 * 1 - transposition
970     X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
971     I2  -   submatrix offset
972     J2  -   submatrix offset
973 
974   ! FREE EDITION OF ALGLIB:
975   !
976   ! Free Edition of ALGLIB supports following important features for  this
977   ! function:
978   ! * C++ version: x64 SIMD support using C++ intrinsics
979   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
980   !
981   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
982   ! Reference Manual in order  to  find  out  how to activate SIMD support
983   ! in ALGLIB.
984 
985   ! COMMERCIAL EDITION OF ALGLIB:
986   !
987   ! Commercial Edition of ALGLIB includes following important improvements
988   ! of this function:
989   ! * high-performance native backend with same C# interface (C# version)
990   ! * multithreading support (C++ and C# versions)
991   ! * hardware vendor (Intel) implementations of linear algebra primitives
992   !   (C++ and C# versions, x86/x64 platform)
993   !
994   ! We recommend you to read 'Working with commercial version' section  of
995   ! ALGLIB Reference Manual in order to find out how to  use  performance-
996   ! related features provided by commercial edition of ALGLIB.
997 
998   -- ALGLIB routine --
999      15.12.2009-22.01.2018
1000      Bochkanov Sergey
1001 *************************************************************************/
rmatrixlefttrsm(const ae_int_t m,const ae_int_t n,const real_2d_array & a,const ae_int_t i1,const ae_int_t j1,const bool isupper,const bool isunit,const ae_int_t optype,const real_2d_array & x,const ae_int_t i2,const ae_int_t j2,const xparams _xparams)1002 void rmatrixlefttrsm(const ae_int_t m, const ae_int_t n, const real_2d_array &a, const ae_int_t i1, const ae_int_t j1, const bool isupper, const bool isunit, const ae_int_t optype, const real_2d_array &x, const ae_int_t i2, const ae_int_t j2, const xparams _xparams)
1003 {
1004     jmp_buf _break_jump;
1005     alglib_impl::ae_state _alglib_env_state;
1006     alglib_impl::ae_state_init(&_alglib_env_state);
1007     if( setjmp(_break_jump) )
1008     {
1009 #if !defined(AE_NO_EXCEPTIONS)
1010         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1011 #else
1012         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1013         return;
1014 #endif
1015     }
1016     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1017     if( _xparams.flags!=0x0 )
1018         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1019     alglib_impl::rmatrixlefttrsm(m, n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), i1, j1, isupper, isunit, optype, const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), i2, j2, &_alglib_env_state);
1020     alglib_impl::ae_state_clear(&_alglib_env_state);
1021     return;
1022 }
1023 
1024 /*************************************************************************
1025 This subroutine calculates  C=alpha*A*A^H+beta*C  or  C=alpha*A^H*A+beta*C
1026 where:
1027 * C is NxN Hermitian matrix given by its upper/lower triangle
1028 * A is NxK matrix when A*A^H is calculated, KxN matrix otherwise
1029 
1030 Additional info:
1031 * multiplication result replaces C. If Beta=0, C elements are not used in
1032   calculations (not multiplied by zero - just not referenced)
1033 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
1034 * if both Beta and Alpha are zero, C is filled by zeros.
1035 
1036 INPUT PARAMETERS
1037     N       -   matrix size, N>=0
1038     K       -   matrix size, K>=0
1039     Alpha   -   coefficient
1040     A       -   matrix
1041     IA      -   submatrix offset (row index)
1042     JA      -   submatrix offset (column index)
1043     OpTypeA -   multiplication type:
1044                 * 0 - A*A^H is calculated
1045                 * 2 - A^H*A is calculated
1046     Beta    -   coefficient
1047     C       -   preallocated input/output matrix
1048     IC      -   submatrix offset (row index)
1049     JC      -   submatrix offset (column index)
1050     IsUpper -   whether upper or lower triangle of C is updated;
1051                 this function updates only one half of C, leaving
1052                 other half unchanged (not referenced at all).
1053 
1054   ! FREE EDITION OF ALGLIB:
1055   !
1056   ! Free Edition of ALGLIB supports following important features for  this
1057   ! function:
1058   ! * C++ version: x64 SIMD support using C++ intrinsics
1059   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1060   !
1061   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1062   ! Reference Manual in order  to  find  out  how to activate SIMD support
1063   ! in ALGLIB.
1064 
1065   ! COMMERCIAL EDITION OF ALGLIB:
1066   !
1067   ! Commercial Edition of ALGLIB includes following important improvements
1068   ! of this function:
1069   ! * high-performance native backend with same C# interface (C# version)
1070   ! * multithreading support (C++ and C# versions)
1071   ! * hardware vendor (Intel) implementations of linear algebra primitives
1072   !   (C++ and C# versions, x86/x64 platform)
1073   !
1074   ! We recommend you to read 'Working with commercial version' section  of
1075   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1076   ! related features provided by commercial edition of ALGLIB.
1077 
1078   -- ALGLIB routine --
1079      16.12.2009-22.01.2018
1080      Bochkanov Sergey
1081 *************************************************************************/
cmatrixherk(const ae_int_t n,const ae_int_t k,const double alpha,const complex_2d_array & a,const ae_int_t ia,const ae_int_t ja,const ae_int_t optypea,const double beta,const complex_2d_array & c,const ae_int_t ic,const ae_int_t jc,const bool isupper,const xparams _xparams)1082 void cmatrixherk(const ae_int_t n, const ae_int_t k, const double alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, const complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper, const xparams _xparams)
1083 {
1084     jmp_buf _break_jump;
1085     alglib_impl::ae_state _alglib_env_state;
1086     alglib_impl::ae_state_init(&_alglib_env_state);
1087     if( setjmp(_break_jump) )
1088     {
1089 #if !defined(AE_NO_EXCEPTIONS)
1090         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1091 #else
1092         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1093         return;
1094 #endif
1095     }
1096     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1097     if( _xparams.flags!=0x0 )
1098         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1099     alglib_impl::cmatrixherk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state);
1100     alglib_impl::ae_state_clear(&_alglib_env_state);
1101     return;
1102 }
1103 
1104 /*************************************************************************
1105 This subroutine calculates  C=alpha*A*A^T+beta*C  or  C=alpha*A^T*A+beta*C
1106 where:
1107 * C is NxN symmetric matrix given by its upper/lower triangle
1108 * A is NxK matrix when A*A^T is calculated, KxN matrix otherwise
1109 
1110 Additional info:
1111 * multiplication result replaces C. If Beta=0, C elements are not used in
1112   calculations (not multiplied by zero - just not referenced)
1113 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
1114 * if both Beta and Alpha are zero, C is filled by zeros.
1115 
1116 INPUT PARAMETERS
1117     N       -   matrix size, N>=0
1118     K       -   matrix size, K>=0
1119     Alpha   -   coefficient
1120     A       -   matrix
1121     IA      -   submatrix offset (row index)
1122     JA      -   submatrix offset (column index)
1123     OpTypeA -   multiplication type:
1124                 * 0 - A*A^T is calculated
1125                 * 2 - A^T*A is calculated
1126     Beta    -   coefficient
1127     C       -   preallocated input/output matrix
1128     IC      -   submatrix offset (row index)
1129     JC      -   submatrix offset (column index)
1130     IsUpper -   whether C is upper triangular or lower triangular
1131 
1132   ! FREE EDITION OF ALGLIB:
1133   !
1134   ! Free Edition of ALGLIB supports following important features for  this
1135   ! function:
1136   ! * C++ version: x64 SIMD support using C++ intrinsics
1137   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1138   !
1139   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1140   ! Reference Manual in order  to  find  out  how to activate SIMD support
1141   ! in ALGLIB.
1142 
1143   ! COMMERCIAL EDITION OF ALGLIB:
1144   !
1145   ! Commercial Edition of ALGLIB includes following important improvements
1146   ! of this function:
1147   ! * high-performance native backend with same C# interface (C# version)
1148   ! * multithreading support (C++ and C# versions)
1149   ! * hardware vendor (Intel) implementations of linear algebra primitives
1150   !   (C++ and C# versions, x86/x64 platform)
1151   !
1152   ! We recommend you to read 'Working with commercial version' section  of
1153   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1154   ! related features provided by commercial edition of ALGLIB.
1155 
1156   -- ALGLIB routine --
1157      16.12.2009-22.01.2018
1158      Bochkanov Sergey
1159 *************************************************************************/
rmatrixsyrk(const ae_int_t n,const ae_int_t k,const double alpha,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const ae_int_t optypea,const double beta,const real_2d_array & c,const ae_int_t ic,const ae_int_t jc,const bool isupper,const xparams _xparams)1160 void rmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, const real_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper, const xparams _xparams)
1161 {
1162     jmp_buf _break_jump;
1163     alglib_impl::ae_state _alglib_env_state;
1164     alglib_impl::ae_state_init(&_alglib_env_state);
1165     if( setjmp(_break_jump) )
1166     {
1167 #if !defined(AE_NO_EXCEPTIONS)
1168         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1169 #else
1170         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1171         return;
1172 #endif
1173     }
1174     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1175     if( _xparams.flags!=0x0 )
1176         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1177     alglib_impl::rmatrixsyrk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state);
1178     alglib_impl::ae_state_clear(&_alglib_env_state);
1179     return;
1180 }
1181 
1182 /*************************************************************************
1183 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
1184 * C is MxN general matrix
1185 * op1(A) is MxK matrix
1186 * op2(B) is KxN matrix
1187 * "op" may be identity transformation, transposition, conjugate transposition
1188 
1189 Additional info:
1190 * cache-oblivious algorithm is used.
1191 * multiplication result replaces C. If Beta=0, C elements are not used in
1192   calculations (not multiplied by zero - just not referenced)
1193 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
1194 * if both Beta and Alpha are zero, C is filled by zeros.
1195 
1196 IMPORTANT:
1197 
1198 This function does NOT preallocate output matrix C, it MUST be preallocated
1199 by caller prior to calling this function. In case C does not have  enough
1200 space to store result, exception will be generated.
1201 
1202 INPUT PARAMETERS
1203     M       -   matrix size, M>0
1204     N       -   matrix size, N>0
1205     K       -   matrix size, K>0
1206     Alpha   -   coefficient
1207     A       -   matrix
1208     IA      -   submatrix offset
1209     JA      -   submatrix offset
1210     OpTypeA -   transformation type:
1211                 * 0 - no transformation
1212                 * 1 - transposition
1213                 * 2 - conjugate transposition
1214     B       -   matrix
1215     IB      -   submatrix offset
1216     JB      -   submatrix offset
1217     OpTypeB -   transformation type:
1218                 * 0 - no transformation
1219                 * 1 - transposition
1220                 * 2 - conjugate transposition
1221     Beta    -   coefficient
1222     C       -   matrix (PREALLOCATED, large enough to store result)
1223     IC      -   submatrix offset
1224     JC      -   submatrix offset
1225 
1226   ! FREE EDITION OF ALGLIB:
1227   !
1228   ! Free Edition of ALGLIB supports following important features for  this
1229   ! function:
1230   ! * C++ version: x64 SIMD support using C++ intrinsics
1231   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1232   !
1233   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1234   ! Reference Manual in order  to  find  out  how to activate SIMD support
1235   ! in ALGLIB.
1236 
1237   ! COMMERCIAL EDITION OF ALGLIB:
1238   !
1239   ! Commercial Edition of ALGLIB includes following important improvements
1240   ! of this function:
1241   ! * high-performance native backend with same C# interface (C# version)
1242   ! * multithreading support (C++ and C# versions)
1243   ! * hardware vendor (Intel) implementations of linear algebra primitives
1244   !   (C++ and C# versions, x86/x64 platform)
1245   !
1246   ! We recommend you to read 'Working with commercial version' section  of
1247   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1248   ! related features provided by commercial edition of ALGLIB.
1249 
1250   -- ALGLIB routine --
1251      2009-2019
1252      Bochkanov Sergey
1253 *************************************************************************/
cmatrixgemm(const ae_int_t m,const ae_int_t n,const ae_int_t k,const alglib::complex alpha,const complex_2d_array & a,const ae_int_t ia,const ae_int_t ja,const ae_int_t optypea,const complex_2d_array & b,const ae_int_t ib,const ae_int_t jb,const ae_int_t optypeb,const alglib::complex beta,const complex_2d_array & c,const ae_int_t ic,const ae_int_t jc,const xparams _xparams)1254 void cmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const alglib::complex alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const complex_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const alglib::complex beta, const complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const xparams _xparams)
1255 {
1256     jmp_buf _break_jump;
1257     alglib_impl::ae_state _alglib_env_state;
1258     alglib_impl::ae_state_init(&_alglib_env_state);
1259     if( setjmp(_break_jump) )
1260     {
1261 #if !defined(AE_NO_EXCEPTIONS)
1262         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1263 #else
1264         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1265         return;
1266 #endif
1267     }
1268     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1269     if( _xparams.flags!=0x0 )
1270         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1271     alglib_impl::cmatrixgemm(m, n, k, *alpha.c_ptr(), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, optypeb, *beta.c_ptr(), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, &_alglib_env_state);
1272     alglib_impl::ae_state_clear(&_alglib_env_state);
1273     return;
1274 }
1275 
1276 /*************************************************************************
1277 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
1278 * C is MxN general matrix
1279 * op1(A) is MxK matrix
1280 * op2(B) is KxN matrix
1281 * "op" may be identity transformation, transposition
1282 
1283 Additional info:
1284 * cache-oblivious algorithm is used.
1285 * multiplication result replaces C. If Beta=0, C elements are not used in
1286   calculations (not multiplied by zero - just not referenced)
1287 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
1288 * if both Beta and Alpha are zero, C is filled by zeros.
1289 
1290 IMPORTANT:
1291 
1292 This function does NOT preallocate output matrix C, it MUST be preallocated
1293 by caller prior to calling this function. In case C does not have  enough
1294 space to store result, exception will be generated.
1295 
1296 INPUT PARAMETERS
1297     M       -   matrix size, M>0
1298     N       -   matrix size, N>0
1299     K       -   matrix size, K>0
1300     Alpha   -   coefficient
1301     A       -   matrix
1302     IA      -   submatrix offset
1303     JA      -   submatrix offset
1304     OpTypeA -   transformation type:
1305                 * 0 - no transformation
1306                 * 1 - transposition
1307     B       -   matrix
1308     IB      -   submatrix offset
1309     JB      -   submatrix offset
1310     OpTypeB -   transformation type:
1311                 * 0 - no transformation
1312                 * 1 - transposition
1313     Beta    -   coefficient
1314     C       -   PREALLOCATED output matrix, large enough to store result
1315     IC      -   submatrix offset
1316     JC      -   submatrix offset
1317 
1318   ! FREE EDITION OF ALGLIB:
1319   !
1320   ! Free Edition of ALGLIB supports following important features for  this
1321   ! function:
1322   ! * C++ version: x64 SIMD support using C++ intrinsics
1323   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1324   !
1325   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1326   ! Reference Manual in order  to  find  out  how to activate SIMD support
1327   ! in ALGLIB.
1328 
1329   ! COMMERCIAL EDITION OF ALGLIB:
1330   !
1331   ! Commercial Edition of ALGLIB includes following important improvements
1332   ! of this function:
1333   ! * high-performance native backend with same C# interface (C# version)
1334   ! * multithreading support (C++ and C# versions)
1335   ! * hardware vendor (Intel) implementations of linear algebra primitives
1336   !   (C++ and C# versions, x86/x64 platform)
1337   !
1338   ! We recommend you to read 'Working with commercial version' section  of
1339   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1340   ! related features provided by commercial edition of ALGLIB.
1341 
1342   -- ALGLIB routine --
1343      2009-2019
1344      Bochkanov Sergey
1345 *************************************************************************/
rmatrixgemm(const ae_int_t m,const ae_int_t n,const ae_int_t k,const double alpha,const real_2d_array & a,const ae_int_t ia,const ae_int_t ja,const ae_int_t optypea,const real_2d_array & b,const ae_int_t ib,const ae_int_t jb,const ae_int_t optypeb,const double beta,const real_2d_array & c,const ae_int_t ic,const ae_int_t jc,const xparams _xparams)1346 void rmatrixgemm(const ae_int_t m, const ae_int_t n, const ae_int_t k, const double alpha, const real_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const real_2d_array &b, const ae_int_t ib, const ae_int_t jb, const ae_int_t optypeb, const double beta, const real_2d_array &c, const ae_int_t ic, const ae_int_t jc, const xparams _xparams)
1347 {
1348     jmp_buf _break_jump;
1349     alglib_impl::ae_state _alglib_env_state;
1350     alglib_impl::ae_state_init(&_alglib_env_state);
1351     if( setjmp(_break_jump) )
1352     {
1353 #if !defined(AE_NO_EXCEPTIONS)
1354         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1355 #else
1356         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1357         return;
1358 #endif
1359     }
1360     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1361     if( _xparams.flags!=0x0 )
1362         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1363     alglib_impl::rmatrixgemm(m, n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), ib, jb, optypeb, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, &_alglib_env_state);
1364     alglib_impl::ae_state_clear(&_alglib_env_state);
1365     return;
1366 }
1367 
1368 /*************************************************************************
1369 This subroutine is an older version of CMatrixHERK(), one with wrong  name
1370 (it is HErmitian update, not SYmmetric). It  is  left  here  for  backward
1371 compatibility.
1372 
1373   -- ALGLIB routine --
1374      16.12.2009
1375      Bochkanov Sergey
1376 *************************************************************************/
cmatrixsyrk(const ae_int_t n,const ae_int_t k,const double alpha,const complex_2d_array & a,const ae_int_t ia,const ae_int_t ja,const ae_int_t optypea,const double beta,const complex_2d_array & c,const ae_int_t ic,const ae_int_t jc,const bool isupper,const xparams _xparams)1377 void cmatrixsyrk(const ae_int_t n, const ae_int_t k, const double alpha, const complex_2d_array &a, const ae_int_t ia, const ae_int_t ja, const ae_int_t optypea, const double beta, const complex_2d_array &c, const ae_int_t ic, const ae_int_t jc, const bool isupper, const xparams _xparams)
1378 {
1379     jmp_buf _break_jump;
1380     alglib_impl::ae_state _alglib_env_state;
1381     alglib_impl::ae_state_init(&_alglib_env_state);
1382     if( setjmp(_break_jump) )
1383     {
1384 #if !defined(AE_NO_EXCEPTIONS)
1385         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1386 #else
1387         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1388         return;
1389 #endif
1390     }
1391     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1392     if( _xparams.flags!=0x0 )
1393         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1394     alglib_impl::cmatrixsyrk(n, k, alpha, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), ia, ja, optypea, beta, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ic, jc, isupper, &_alglib_env_state);
1395     alglib_impl::ae_state_clear(&_alglib_env_state);
1396     return;
1397 }
1398 #endif
1399 
1400 #if defined(AE_COMPILE_ORTFAC) || !defined(AE_PARTIAL_BUILD)
1401 /*************************************************************************
1402 QR decomposition of a rectangular matrix of size MxN
1403 
1404 Input parameters:
1405     A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
1406     M   -   number of rows in matrix A.
1407     N   -   number of columns in matrix A.
1408 
1409 Output parameters:
1410     A   -   matrices Q and R in compact form (see below).
1411     Tau -   array of scalar factors which are used to form
1412             matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)].
1413 
1414 Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
1415 MxM, R - upper triangular (or upper trapezoid) matrix of size M x N.
1416 
1417 The elements of matrix R are located on and above the main diagonal of
1418 matrix A. The elements which are located in Tau array and below the main
1419 diagonal of matrix A are used to form matrix Q as follows:
1420 
1421 Matrix Q is represented as a product of elementary reflections
1422 
1423 Q = H(0)*H(2)*...*H(k-1),
1424 
1425 where k = min(m,n), and each H(i) is in the form
1426 
1427 H(i) = 1 - tau * v * (v^T)
1428 
1429 where tau is a scalar stored in Tau[I]; v - real vector,
1430 so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i).
1431 
1432   ! FREE EDITION OF ALGLIB:
1433   !
1434   ! Free Edition of ALGLIB supports following important features for  this
1435   ! function:
1436   ! * C++ version: x64 SIMD support using C++ intrinsics
1437   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1438   !
1439   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1440   ! Reference Manual in order  to  find  out  how to activate SIMD support
1441   ! in ALGLIB.
1442 
1443   ! COMMERCIAL EDITION OF ALGLIB:
1444   !
1445   ! Commercial Edition of ALGLIB includes following important improvements
1446   ! of this function:
1447   ! * high-performance native backend with same C# interface (C# version)
1448   ! * multithreading support (C++ and C# versions)
1449   ! * hardware vendor (Intel) implementations of linear algebra primitives
1450   !   (C++ and C# versions, x86/x64 platform)
1451   !
1452   ! We recommend you to read 'Working with commercial version' section  of
1453   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1454   ! related features provided by commercial edition of ALGLIB.
1455 
1456   -- ALGLIB routine --
1457      17.02.2010
1458      Bochkanov Sergey
1459 *************************************************************************/
rmatrixqr(real_2d_array & a,const ae_int_t m,const ae_int_t n,real_1d_array & tau,const xparams _xparams)1460 void rmatrixqr(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau, const xparams _xparams)
1461 {
1462     jmp_buf _break_jump;
1463     alglib_impl::ae_state _alglib_env_state;
1464     alglib_impl::ae_state_init(&_alglib_env_state);
1465     if( setjmp(_break_jump) )
1466     {
1467 #if !defined(AE_NO_EXCEPTIONS)
1468         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1469 #else
1470         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1471         return;
1472 #endif
1473     }
1474     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1475     if( _xparams.flags!=0x0 )
1476         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1477     alglib_impl::rmatrixqr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
1478     alglib_impl::ae_state_clear(&_alglib_env_state);
1479     return;
1480 }
1481 
1482 /*************************************************************************
1483 LQ decomposition of a rectangular matrix of size MxN
1484 
1485 Input parameters:
1486     A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
1487     M   -   number of rows in matrix A.
1488     N   -   number of columns in matrix A.
1489 
1490 Output parameters:
1491     A   -   matrices L and Q in compact form (see below)
1492     Tau -   array of scalar factors which are used to form
1493             matrix Q. Array whose index ranges within [0..Min(M,N)-1].
1494 
1495 Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
1496 MxM, L - lower triangular (or lower trapezoid) matrix of size M x N.
1497 
1498 The elements of matrix L are located on and below  the  main  diagonal  of
1499 matrix A. The elements which are located in Tau array and above  the  main
1500 diagonal of matrix A are used to form matrix Q as follows:
1501 
1502 Matrix Q is represented as a product of elementary reflections
1503 
1504 Q = H(k-1)*H(k-2)*...*H(1)*H(0),
1505 
1506 where k = min(m,n), and each H(i) is of the form
1507 
1508 H(i) = 1 - tau * v * (v^T)
1509 
1510 where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0,
1511 v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1).
1512 
1513   ! FREE EDITION OF ALGLIB:
1514   !
1515   ! Free Edition of ALGLIB supports following important features for  this
1516   ! function:
1517   ! * C++ version: x64 SIMD support using C++ intrinsics
1518   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1519   !
1520   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1521   ! Reference Manual in order  to  find  out  how to activate SIMD support
1522   ! in ALGLIB.
1523 
1524   ! COMMERCIAL EDITION OF ALGLIB:
1525   !
1526   ! Commercial Edition of ALGLIB includes following important improvements
1527   ! of this function:
1528   ! * high-performance native backend with same C# interface (C# version)
1529   ! * multithreading support (C++ and C# versions)
1530   ! * hardware vendor (Intel) implementations of linear algebra primitives
1531   !   (C++ and C# versions, x86/x64 platform)
1532   !
1533   ! We recommend you to read 'Working with commercial version' section  of
1534   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1535   ! related features provided by commercial edition of ALGLIB.
1536 
1537   -- ALGLIB routine --
1538      17.02.2010
1539      Bochkanov Sergey
1540 *************************************************************************/
rmatrixlq(real_2d_array & a,const ae_int_t m,const ae_int_t n,real_1d_array & tau,const xparams _xparams)1541 void rmatrixlq(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tau, const xparams _xparams)
1542 {
1543     jmp_buf _break_jump;
1544     alglib_impl::ae_state _alglib_env_state;
1545     alglib_impl::ae_state_init(&_alglib_env_state);
1546     if( setjmp(_break_jump) )
1547     {
1548 #if !defined(AE_NO_EXCEPTIONS)
1549         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1550 #else
1551         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1552         return;
1553 #endif
1554     }
1555     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1556     if( _xparams.flags!=0x0 )
1557         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1558     alglib_impl::rmatrixlq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
1559     alglib_impl::ae_state_clear(&_alglib_env_state);
1560     return;
1561 }
1562 
1563 /*************************************************************************
1564 QR decomposition of a rectangular complex matrix of size MxN
1565 
1566 Input parameters:
1567     A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
1568     M   -   number of rows in matrix A.
1569     N   -   number of columns in matrix A.
1570 
1571 Output parameters:
1572     A   -   matrices Q and R in compact form
1573     Tau -   array of scalar factors which are used to form matrix Q. Array
1574             whose indexes range within [0.. Min(M,N)-1]
1575 
1576 Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
1577 MxM, R - upper triangular (or upper trapezoid) matrix of size MxN.
1578 
1579   ! FREE EDITION OF ALGLIB:
1580   !
1581   ! Free Edition of ALGLIB supports following important features for  this
1582   ! function:
1583   ! * C++ version: x64 SIMD support using C++ intrinsics
1584   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1585   !
1586   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1587   ! Reference Manual in order  to  find  out  how to activate SIMD support
1588   ! in ALGLIB.
1589 
1590   ! COMMERCIAL EDITION OF ALGLIB:
1591   !
1592   ! Commercial Edition of ALGLIB includes following important improvements
1593   ! of this function:
1594   ! * high-performance native backend with same C# interface (C# version)
1595   ! * multithreading support (C++ and C# versions)
1596   ! * hardware vendor (Intel) implementations of linear algebra primitives
1597   !   (C++ and C# versions, x86/x64 platform)
1598   !
1599   ! We recommend you to read 'Working with commercial version' section  of
1600   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1601   ! related features provided by commercial edition of ALGLIB.
1602 
1603   -- LAPACK routine (version 3.0) --
1604      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1605      Courant Institute, Argonne National Lab, and Rice University
1606      September 30, 1994
1607 *************************************************************************/
cmatrixqr(complex_2d_array & a,const ae_int_t m,const ae_int_t n,complex_1d_array & tau,const xparams _xparams)1608 void cmatrixqr(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau, const xparams _xparams)
1609 {
1610     jmp_buf _break_jump;
1611     alglib_impl::ae_state _alglib_env_state;
1612     alglib_impl::ae_state_init(&_alglib_env_state);
1613     if( setjmp(_break_jump) )
1614     {
1615 #if !defined(AE_NO_EXCEPTIONS)
1616         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1617 #else
1618         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1619         return;
1620 #endif
1621     }
1622     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1623     if( _xparams.flags!=0x0 )
1624         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1625     alglib_impl::cmatrixqr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
1626     alglib_impl::ae_state_clear(&_alglib_env_state);
1627     return;
1628 }
1629 
1630 /*************************************************************************
1631 LQ decomposition of a rectangular complex matrix of size MxN
1632 
1633 Input parameters:
1634     A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
1635     M   -   number of rows in matrix A.
1636     N   -   number of columns in matrix A.
1637 
1638 Output parameters:
1639     A   -   matrices Q and L in compact form
1640     Tau -   array of scalar factors which are used to form matrix Q. Array
1641             whose indexes range within [0.. Min(M,N)-1]
1642 
1643 Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
1644 MxM, L - lower triangular (or lower trapezoid) matrix of size MxN.
1645 
1646   ! FREE EDITION OF ALGLIB:
1647   !
1648   ! Free Edition of ALGLIB supports following important features for  this
1649   ! function:
1650   ! * C++ version: x64 SIMD support using C++ intrinsics
1651   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1652   !
1653   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1654   ! Reference Manual in order  to  find  out  how to activate SIMD support
1655   ! in ALGLIB.
1656 
1657   ! COMMERCIAL EDITION OF ALGLIB:
1658   !
1659   ! Commercial Edition of ALGLIB includes following important improvements
1660   ! of this function:
1661   ! * high-performance native backend with same C# interface (C# version)
1662   ! * multithreading support (C++ and C# versions)
1663   ! * hardware vendor (Intel) implementations of linear algebra primitives
1664   !   (C++ and C# versions, x86/x64 platform)
1665   !
1666   ! We recommend you to read 'Working with commercial version' section  of
1667   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1668   ! related features provided by commercial edition of ALGLIB.
1669 
1670   -- LAPACK routine (version 3.0) --
1671      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1672      Courant Institute, Argonne National Lab, and Rice University
1673      September 30, 1994
1674 *************************************************************************/
cmatrixlq(complex_2d_array & a,const ae_int_t m,const ae_int_t n,complex_1d_array & tau,const xparams _xparams)1675 void cmatrixlq(complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_1d_array &tau, const xparams _xparams)
1676 {
1677     jmp_buf _break_jump;
1678     alglib_impl::ae_state _alglib_env_state;
1679     alglib_impl::ae_state_init(&_alglib_env_state);
1680     if( setjmp(_break_jump) )
1681     {
1682 #if !defined(AE_NO_EXCEPTIONS)
1683         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1684 #else
1685         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1686         return;
1687 #endif
1688     }
1689     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1690     if( _xparams.flags!=0x0 )
1691         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1692     alglib_impl::cmatrixlq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
1693     alglib_impl::ae_state_clear(&_alglib_env_state);
1694     return;
1695 }
1696 
1697 /*************************************************************************
1698 Partial unpacking of matrix Q from the QR decomposition of a matrix A
1699 
1700 Input parameters:
1701     A       -   matrices Q and R in compact form.
1702                 Output of RMatrixQR subroutine.
1703     M       -   number of rows in given matrix A. M>=0.
1704     N       -   number of columns in given matrix A. N>=0.
1705     Tau     -   scalar factors which are used to form Q.
1706                 Output of the RMatrixQR subroutine.
1707     QColumns -  required number of columns of matrix Q. M>=QColumns>=0.
1708 
1709 Output parameters:
1710     Q       -   first QColumns columns of matrix Q.
1711                 Array whose indexes range within [0..M-1, 0..QColumns-1].
1712                 If QColumns=0, the array remains unchanged.
1713 
1714   ! FREE EDITION OF ALGLIB:
1715   !
1716   ! Free Edition of ALGLIB supports following important features for  this
1717   ! function:
1718   ! * C++ version: x64 SIMD support using C++ intrinsics
1719   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1720   !
1721   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1722   ! Reference Manual in order  to  find  out  how to activate SIMD support
1723   ! in ALGLIB.
1724 
1725   ! COMMERCIAL EDITION OF ALGLIB:
1726   !
1727   ! Commercial Edition of ALGLIB includes following important improvements
1728   ! of this function:
1729   ! * high-performance native backend with same C# interface (C# version)
1730   ! * multithreading support (C++ and C# versions)
1731   ! * hardware vendor (Intel) implementations of linear algebra primitives
1732   !   (C++ and C# versions, x86/x64 platform)
1733   !
1734   ! We recommend you to read 'Working with commercial version' section  of
1735   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1736   ! related features provided by commercial edition of ALGLIB.
1737 
1738   -- ALGLIB routine --
1739      17.02.2010
1740      Bochkanov Sergey
1741 *************************************************************************/
rmatrixqrunpackq(const real_2d_array & a,const ae_int_t m,const ae_int_t n,const real_1d_array & tau,const ae_int_t qcolumns,real_2d_array & q,const xparams _xparams)1742 void rmatrixqrunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qcolumns, real_2d_array &q, const xparams _xparams)
1743 {
1744     jmp_buf _break_jump;
1745     alglib_impl::ae_state _alglib_env_state;
1746     alglib_impl::ae_state_init(&_alglib_env_state);
1747     if( setjmp(_break_jump) )
1748     {
1749 #if !defined(AE_NO_EXCEPTIONS)
1750         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1751 #else
1752         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1753         return;
1754 #endif
1755     }
1756     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1757     if( _xparams.flags!=0x0 )
1758         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1759     alglib_impl::rmatrixqrunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
1760     alglib_impl::ae_state_clear(&_alglib_env_state);
1761     return;
1762 }
1763 
1764 /*************************************************************************
1765 Unpacking of matrix R from the QR decomposition of a matrix A
1766 
1767 Input parameters:
1768     A       -   matrices Q and R in compact form.
1769                 Output of RMatrixQR subroutine.
1770     M       -   number of rows in given matrix A. M>=0.
1771     N       -   number of columns in given matrix A. N>=0.
1772 
1773 Output parameters:
1774     R       -   matrix R, array[0..M-1, 0..N-1].
1775 
1776   -- ALGLIB routine --
1777      17.02.2010
1778      Bochkanov Sergey
1779 *************************************************************************/
rmatrixqrunpackr(const real_2d_array & a,const ae_int_t m,const ae_int_t n,real_2d_array & r,const xparams _xparams)1780 void rmatrixqrunpackr(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &r, const xparams _xparams)
1781 {
1782     jmp_buf _break_jump;
1783     alglib_impl::ae_state _alglib_env_state;
1784     alglib_impl::ae_state_init(&_alglib_env_state);
1785     if( setjmp(_break_jump) )
1786     {
1787 #if !defined(AE_NO_EXCEPTIONS)
1788         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1789 #else
1790         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1791         return;
1792 #endif
1793     }
1794     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1795     if( _xparams.flags!=0x0 )
1796         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1797     alglib_impl::rmatrixqrunpackr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &_alglib_env_state);
1798     alglib_impl::ae_state_clear(&_alglib_env_state);
1799     return;
1800 }
1801 
1802 /*************************************************************************
1803 Partial unpacking of matrix Q from the LQ decomposition of a matrix A
1804 
1805 Input parameters:
1806     A       -   matrices L and Q in compact form.
1807                 Output of RMatrixLQ subroutine.
1808     M       -   number of rows in given matrix A. M>=0.
1809     N       -   number of columns in given matrix A. N>=0.
1810     Tau     -   scalar factors which are used to form Q.
1811                 Output of the RMatrixLQ subroutine.
1812     QRows   -   required number of rows in matrix Q. N>=QRows>=0.
1813 
1814 Output parameters:
1815     Q       -   first QRows rows of matrix Q. Array whose indexes range
1816                 within [0..QRows-1, 0..N-1]. If QRows=0, the array remains
1817                 unchanged.
1818 
1819   ! FREE EDITION OF ALGLIB:
1820   !
1821   ! Free Edition of ALGLIB supports following important features for  this
1822   ! function:
1823   ! * C++ version: x64 SIMD support using C++ intrinsics
1824   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1825   !
1826   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1827   ! Reference Manual in order  to  find  out  how to activate SIMD support
1828   ! in ALGLIB.
1829 
1830   ! COMMERCIAL EDITION OF ALGLIB:
1831   !
1832   ! Commercial Edition of ALGLIB includes following important improvements
1833   ! of this function:
1834   ! * high-performance native backend with same C# interface (C# version)
1835   ! * multithreading support (C++ and C# versions)
1836   ! * hardware vendor (Intel) implementations of linear algebra primitives
1837   !   (C++ and C# versions, x86/x64 platform)
1838   !
1839   ! We recommend you to read 'Working with commercial version' section  of
1840   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1841   ! related features provided by commercial edition of ALGLIB.
1842 
1843   -- ALGLIB routine --
1844      17.02.2010
1845      Bochkanov Sergey
1846 *************************************************************************/
rmatrixlqunpackq(const real_2d_array & a,const ae_int_t m,const ae_int_t n,const real_1d_array & tau,const ae_int_t qrows,real_2d_array & q,const xparams _xparams)1847 void rmatrixlqunpackq(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const real_1d_array &tau, const ae_int_t qrows, real_2d_array &q, const xparams _xparams)
1848 {
1849     jmp_buf _break_jump;
1850     alglib_impl::ae_state _alglib_env_state;
1851     alglib_impl::ae_state_init(&_alglib_env_state);
1852     if( setjmp(_break_jump) )
1853     {
1854 #if !defined(AE_NO_EXCEPTIONS)
1855         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1856 #else
1857         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1858         return;
1859 #endif
1860     }
1861     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1862     if( _xparams.flags!=0x0 )
1863         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1864     alglib_impl::rmatrixlqunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qrows, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
1865     alglib_impl::ae_state_clear(&_alglib_env_state);
1866     return;
1867 }
1868 
1869 /*************************************************************************
1870 Unpacking of matrix L from the LQ decomposition of a matrix A
1871 
1872 Input parameters:
1873     A       -   matrices Q and L in compact form.
1874                 Output of RMatrixLQ subroutine.
1875     M       -   number of rows in given matrix A. M>=0.
1876     N       -   number of columns in given matrix A. N>=0.
1877 
1878 Output parameters:
1879     L       -   matrix L, array[0..M-1, 0..N-1].
1880 
1881   -- ALGLIB routine --
1882      17.02.2010
1883      Bochkanov Sergey
1884 *************************************************************************/
rmatrixlqunpackl(const real_2d_array & a,const ae_int_t m,const ae_int_t n,real_2d_array & l,const xparams _xparams)1885 void rmatrixlqunpackl(const real_2d_array &a, const ae_int_t m, const ae_int_t n, real_2d_array &l, const xparams _xparams)
1886 {
1887     jmp_buf _break_jump;
1888     alglib_impl::ae_state _alglib_env_state;
1889     alglib_impl::ae_state_init(&_alglib_env_state);
1890     if( setjmp(_break_jump) )
1891     {
1892 #if !defined(AE_NO_EXCEPTIONS)
1893         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1894 #else
1895         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1896         return;
1897 #endif
1898     }
1899     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1900     if( _xparams.flags!=0x0 )
1901         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1902     alglib_impl::rmatrixlqunpackl(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(l.c_ptr()), &_alglib_env_state);
1903     alglib_impl::ae_state_clear(&_alglib_env_state);
1904     return;
1905 }
1906 
1907 /*************************************************************************
1908 Partial unpacking of matrix Q from QR decomposition of a complex matrix A.
1909 
1910 Input parameters:
1911     A           -   matrices Q and R in compact form.
1912                     Output of CMatrixQR subroutine .
1913     M           -   number of rows in matrix A. M>=0.
1914     N           -   number of columns in matrix A. N>=0.
1915     Tau         -   scalar factors which are used to form Q.
1916                     Output of CMatrixQR subroutine .
1917     QColumns    -   required number of columns in matrix Q. M>=QColumns>=0.
1918 
1919 Output parameters:
1920     Q           -   first QColumns columns of matrix Q.
1921                     Array whose index ranges within [0..M-1, 0..QColumns-1].
1922                     If QColumns=0, array isn't changed.
1923 
1924   ! FREE EDITION OF ALGLIB:
1925   !
1926   ! Free Edition of ALGLIB supports following important features for  this
1927   ! function:
1928   ! * C++ version: x64 SIMD support using C++ intrinsics
1929   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
1930   !
1931   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
1932   ! Reference Manual in order  to  find  out  how to activate SIMD support
1933   ! in ALGLIB.
1934 
1935   ! COMMERCIAL EDITION OF ALGLIB:
1936   !
1937   ! Commercial Edition of ALGLIB includes following important improvements
1938   ! of this function:
1939   ! * high-performance native backend with same C# interface (C# version)
1940   ! * multithreading support (C++ and C# versions)
1941   ! * hardware vendor (Intel) implementations of linear algebra primitives
1942   !   (C++ and C# versions, x86/x64 platform)
1943   !
1944   ! We recommend you to read 'Working with commercial version' section  of
1945   ! ALGLIB Reference Manual in order to find out how to  use  performance-
1946   ! related features provided by commercial edition of ALGLIB.
1947 
1948   -- ALGLIB routine --
1949      17.02.2010
1950      Bochkanov Sergey
1951 *************************************************************************/
cmatrixqrunpackq(const complex_2d_array & a,const ae_int_t m,const ae_int_t n,const complex_1d_array & tau,const ae_int_t qcolumns,complex_2d_array & q,const xparams _xparams)1952 void cmatrixqrunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qcolumns, complex_2d_array &q, const xparams _xparams)
1953 {
1954     jmp_buf _break_jump;
1955     alglib_impl::ae_state _alglib_env_state;
1956     alglib_impl::ae_state_init(&_alglib_env_state);
1957     if( setjmp(_break_jump) )
1958     {
1959 #if !defined(AE_NO_EXCEPTIONS)
1960         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1961 #else
1962         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1963         return;
1964 #endif
1965     }
1966     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1967     if( _xparams.flags!=0x0 )
1968         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1969     alglib_impl::cmatrixqrunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
1970     alglib_impl::ae_state_clear(&_alglib_env_state);
1971     return;
1972 }
1973 
1974 /*************************************************************************
1975 Unpacking of matrix R from the QR decomposition of a matrix A
1976 
1977 Input parameters:
1978     A       -   matrices Q and R in compact form.
1979                 Output of CMatrixQR subroutine.
1980     M       -   number of rows in given matrix A. M>=0.
1981     N       -   number of columns in given matrix A. N>=0.
1982 
1983 Output parameters:
1984     R       -   matrix R, array[0..M-1, 0..N-1].
1985 
1986   -- ALGLIB routine --
1987      17.02.2010
1988      Bochkanov Sergey
1989 *************************************************************************/
cmatrixqrunpackr(const complex_2d_array & a,const ae_int_t m,const ae_int_t n,complex_2d_array & r,const xparams _xparams)1990 void cmatrixqrunpackr(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &r, const xparams _xparams)
1991 {
1992     jmp_buf _break_jump;
1993     alglib_impl::ae_state _alglib_env_state;
1994     alglib_impl::ae_state_init(&_alglib_env_state);
1995     if( setjmp(_break_jump) )
1996     {
1997 #if !defined(AE_NO_EXCEPTIONS)
1998         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1999 #else
2000         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2001         return;
2002 #endif
2003     }
2004     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2005     if( _xparams.flags!=0x0 )
2006         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2007     alglib_impl::cmatrixqrunpackr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &_alglib_env_state);
2008     alglib_impl::ae_state_clear(&_alglib_env_state);
2009     return;
2010 }
2011 
2012 /*************************************************************************
2013 Partial unpacking of matrix Q from LQ decomposition of a complex matrix A.
2014 
2015 Input parameters:
2016     A           -   matrices Q and R in compact form.
2017                     Output of CMatrixLQ subroutine .
2018     M           -   number of rows in matrix A. M>=0.
2019     N           -   number of columns in matrix A. N>=0.
2020     Tau         -   scalar factors which are used to form Q.
2021                     Output of CMatrixLQ subroutine .
2022     QRows       -   required number of rows in matrix Q. N>=QColumns>=0.
2023 
2024 Output parameters:
2025     Q           -   first QRows rows of matrix Q.
2026                     Array whose index ranges within [0..QRows-1, 0..N-1].
2027                     If QRows=0, array isn't changed.
2028 
2029   ! FREE EDITION OF ALGLIB:
2030   !
2031   ! Free Edition of ALGLIB supports following important features for  this
2032   ! function:
2033   ! * C++ version: x64 SIMD support using C++ intrinsics
2034   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
2035   !
2036   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
2037   ! Reference Manual in order  to  find  out  how to activate SIMD support
2038   ! in ALGLIB.
2039 
2040   ! COMMERCIAL EDITION OF ALGLIB:
2041   !
2042   ! Commercial Edition of ALGLIB includes following important improvements
2043   ! of this function:
2044   ! * high-performance native backend with same C# interface (C# version)
2045   ! * multithreading support (C++ and C# versions)
2046   ! * hardware vendor (Intel) implementations of linear algebra primitives
2047   !   (C++ and C# versions, x86/x64 platform)
2048   !
2049   ! We recommend you to read 'Working with commercial version' section  of
2050   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2051   ! related features provided by commercial edition of ALGLIB.
2052 
2053   -- ALGLIB routine --
2054      17.02.2010
2055      Bochkanov Sergey
2056 *************************************************************************/
cmatrixlqunpackq(const complex_2d_array & a,const ae_int_t m,const ae_int_t n,const complex_1d_array & tau,const ae_int_t qrows,complex_2d_array & q,const xparams _xparams)2057 void cmatrixlqunpackq(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, const complex_1d_array &tau, const ae_int_t qrows, complex_2d_array &q, const xparams _xparams)
2058 {
2059     jmp_buf _break_jump;
2060     alglib_impl::ae_state _alglib_env_state;
2061     alglib_impl::ae_state_init(&_alglib_env_state);
2062     if( setjmp(_break_jump) )
2063     {
2064 #if !defined(AE_NO_EXCEPTIONS)
2065         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2066 #else
2067         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2068         return;
2069 #endif
2070     }
2071     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2072     if( _xparams.flags!=0x0 )
2073         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2074     alglib_impl::cmatrixlqunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), qrows, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
2075     alglib_impl::ae_state_clear(&_alglib_env_state);
2076     return;
2077 }
2078 
2079 /*************************************************************************
2080 Unpacking of matrix L from the LQ decomposition of a matrix A
2081 
2082 Input parameters:
2083     A       -   matrices Q and L in compact form.
2084                 Output of CMatrixLQ subroutine.
2085     M       -   number of rows in given matrix A. M>=0.
2086     N       -   number of columns in given matrix A. N>=0.
2087 
2088 Output parameters:
2089     L       -   matrix L, array[0..M-1, 0..N-1].
2090 
2091   -- ALGLIB routine --
2092      17.02.2010
2093      Bochkanov Sergey
2094 *************************************************************************/
cmatrixlqunpackl(const complex_2d_array & a,const ae_int_t m,const ae_int_t n,complex_2d_array & l,const xparams _xparams)2095 void cmatrixlqunpackl(const complex_2d_array &a, const ae_int_t m, const ae_int_t n, complex_2d_array &l, const xparams _xparams)
2096 {
2097     jmp_buf _break_jump;
2098     alglib_impl::ae_state _alglib_env_state;
2099     alglib_impl::ae_state_init(&_alglib_env_state);
2100     if( setjmp(_break_jump) )
2101     {
2102 #if !defined(AE_NO_EXCEPTIONS)
2103         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2104 #else
2105         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2106         return;
2107 #endif
2108     }
2109     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2110     if( _xparams.flags!=0x0 )
2111         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2112     alglib_impl::cmatrixlqunpackl(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_matrix*>(l.c_ptr()), &_alglib_env_state);
2113     alglib_impl::ae_state_clear(&_alglib_env_state);
2114     return;
2115 }
2116 
2117 /*************************************************************************
2118 Reduction of a rectangular matrix to  bidiagonal form
2119 
2120 The algorithm reduces the rectangular matrix A to  bidiagonal form by
2121 orthogonal transformations P and Q: A = Q*B*(P^T).
2122 
2123   ! COMMERCIAL EDITION OF ALGLIB:
2124   !
2125   ! Commercial Edition of ALGLIB includes following important improvements
2126   ! of this function:
2127   ! * high-performance native backend with same C# interface (C# version)
2128   ! * hardware vendor (Intel) implementations of linear algebra primitives
2129   !   (C++ and C# versions, x86/x64 platform)
2130   !
2131   ! We recommend you to read 'Working with commercial version' section  of
2132   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2133   ! related features provided by commercial edition of ALGLIB.
2134 
2135 Input parameters:
2136     A       -   source matrix. array[0..M-1, 0..N-1]
2137     M       -   number of rows in matrix A.
2138     N       -   number of columns in matrix A.
2139 
2140 Output parameters:
2141     A       -   matrices Q, B, P in compact form (see below).
2142     TauQ    -   scalar factors which are used to form matrix Q.
2143     TauP    -   scalar factors which are used to form matrix P.
2144 
2145 The main diagonal and one of the  secondary  diagonals  of  matrix  A  are
2146 replaced with bidiagonal  matrix  B.  Other  elements  contain  elementary
2147 reflections which form MxM matrix Q and NxN matrix P, respectively.
2148 
2149 If M>=N, B is the upper  bidiagonal  MxN  matrix  and  is  stored  in  the
2150 corresponding  elements  of  matrix  A.  Matrix  Q  is  represented  as  a
2151 product   of   elementary   reflections   Q = H(0)*H(1)*...*H(n-1),  where
2152 H(i) = 1-tau*v*v'. Here tau is a scalar which is stored  in  TauQ[i],  and
2153 vector v has the following  structure:  v(0:i-1)=0, v(i)=1, v(i+1:m-1)  is
2154 stored   in   elements   A(i+1:m-1,i).   Matrix   P  is  as  follows:  P =
2155 G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i],
2156 u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1).
2157 
2158 If M<N, B is the  lower  bidiagonal  MxN  matrix  and  is  stored  in  the
2159 corresponding   elements  of  matrix  A.  Q = H(0)*H(1)*...*H(m-2),  where
2160 H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1)
2161 is    stored    in   elements   A(i+2:m-1,i).    P = G(0)*G(1)*...*G(m-1),
2162 G(i) = 1-tau*u*u', tau is stored in  TauP,  u(0:i-1)=0, u(i)=1, u(i+1:n-1)
2163 is stored in A(i,i+1:n-1).
2164 
2165 EXAMPLE:
2166 
2167 m=6, n=5 (m > n):               m=5, n=6 (m < n):
2168 
2169 (  d   e   u1  u1  u1 )         (  d   u1  u1  u1  u1  u1 )
2170 (  v1  d   e   u2  u2 )         (  e   d   u2  u2  u2  u2 )
2171 (  v1  v2  d   e   u3 )         (  v1  e   d   u3  u3  u3 )
2172 (  v1  v2  v3  d   e  )         (  v1  v2  e   d   u4  u4 )
2173 (  v1  v2  v3  v4  d  )         (  v1  v2  v3  e   d   u5 )
2174 (  v1  v2  v3  v4  v5 )
2175 
2176 Here vi and ui are vectors which form H(i) and G(i), and d and e -
2177 are the diagonal and off-diagonal elements of matrix B.
2178 
2179   -- LAPACK routine (version 3.0) --
2180      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2181      Courant Institute, Argonne National Lab, and Rice University
2182      September 30, 1994.
2183      Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
2184      pseudocode, 2007-2010.
2185 *************************************************************************/
rmatrixbd(real_2d_array & a,const ae_int_t m,const ae_int_t n,real_1d_array & tauq,real_1d_array & taup,const xparams _xparams)2186 void rmatrixbd(real_2d_array &a, const ae_int_t m, const ae_int_t n, real_1d_array &tauq, real_1d_array &taup, const xparams _xparams)
2187 {
2188     jmp_buf _break_jump;
2189     alglib_impl::ae_state _alglib_env_state;
2190     alglib_impl::ae_state_init(&_alglib_env_state);
2191     if( setjmp(_break_jump) )
2192     {
2193 #if !defined(AE_NO_EXCEPTIONS)
2194         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2195 #else
2196         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2197         return;
2198 #endif
2199     }
2200     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2201     if( _xparams.flags!=0x0 )
2202         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2203     alglib_impl::rmatrixbd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), &_alglib_env_state);
2204     alglib_impl::ae_state_clear(&_alglib_env_state);
2205     return;
2206 }
2207 
2208 /*************************************************************************
2209 Unpacking matrix Q which reduces a matrix to bidiagonal form.
2210 
2211   ! COMMERCIAL EDITION OF ALGLIB:
2212   !
2213   ! Commercial Edition of ALGLIB includes following important improvements
2214   ! of this function:
2215   ! * high-performance native backend with same C# interface (C# version)
2216   ! * hardware vendor (Intel) implementations of linear algebra primitives
2217   !   (C++ and C# versions, x86/x64 platform)
2218   !
2219   ! We recommend you to read 'Working with commercial version' section  of
2220   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2221   ! related features provided by commercial edition of ALGLIB.
2222 
2223 Input parameters:
2224     QP          -   matrices Q and P in compact form.
2225                     Output of ToBidiagonal subroutine.
2226     M           -   number of rows in matrix A.
2227     N           -   number of columns in matrix A.
2228     TAUQ        -   scalar factors which are used to form Q.
2229                     Output of ToBidiagonal subroutine.
2230     QColumns    -   required number of columns in matrix Q.
2231                     M>=QColumns>=0.
2232 
2233 Output parameters:
2234     Q           -   first QColumns columns of matrix Q.
2235                     Array[0..M-1, 0..QColumns-1]
2236                     If QColumns=0, the array is not modified.
2237 
2238   -- ALGLIB --
2239      2005-2010
2240      Bochkanov Sergey
2241 *************************************************************************/
rmatrixbdunpackq(const real_2d_array & qp,const ae_int_t m,const ae_int_t n,const real_1d_array & tauq,const ae_int_t qcolumns,real_2d_array & q,const xparams _xparams)2242 void rmatrixbdunpackq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, const ae_int_t qcolumns, real_2d_array &q, const xparams _xparams)
2243 {
2244     jmp_buf _break_jump;
2245     alglib_impl::ae_state _alglib_env_state;
2246     alglib_impl::ae_state_init(&_alglib_env_state);
2247     if( setjmp(_break_jump) )
2248     {
2249 #if !defined(AE_NO_EXCEPTIONS)
2250         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2251 #else
2252         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2253         return;
2254 #endif
2255     }
2256     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2257     if( _xparams.flags!=0x0 )
2258         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2259     alglib_impl::rmatrixbdunpackq(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), qcolumns, const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
2260     alglib_impl::ae_state_clear(&_alglib_env_state);
2261     return;
2262 }
2263 
2264 /*************************************************************************
2265 Multiplication by matrix Q which reduces matrix A to  bidiagonal form.
2266 
2267 The algorithm allows pre- or post-multiply by Q or Q'.
2268 
2269   ! COMMERCIAL EDITION OF ALGLIB:
2270   !
2271   ! Commercial Edition of ALGLIB includes following important improvements
2272   ! of this function:
2273   ! * high-performance native backend with same C# interface (C# version)
2274   ! * hardware vendor (Intel) implementations of linear algebra primitives
2275   !   (C++ and C# versions, x86/x64 platform)
2276   !
2277   ! We recommend you to read 'Working with commercial version' section  of
2278   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2279   ! related features provided by commercial edition of ALGLIB.
2280 
2281 Input parameters:
2282     QP          -   matrices Q and P in compact form.
2283                     Output of ToBidiagonal subroutine.
2284     M           -   number of rows in matrix A.
2285     N           -   number of columns in matrix A.
2286     TAUQ        -   scalar factors which are used to form Q.
2287                     Output of ToBidiagonal subroutine.
2288     Z           -   multiplied matrix.
2289                     array[0..ZRows-1,0..ZColumns-1]
2290     ZRows       -   number of rows in matrix Z. If FromTheRight=False,
2291                     ZRows=M, otherwise ZRows can be arbitrary.
2292     ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
2293                     ZColumns=M, otherwise ZColumns can be arbitrary.
2294     FromTheRight -  pre- or post-multiply.
2295     DoTranspose -   multiply by Q or Q'.
2296 
2297 Output parameters:
2298     Z           -   product of Z and Q.
2299                     Array[0..ZRows-1,0..ZColumns-1]
2300                     If ZRows=0 or ZColumns=0, the array is not modified.
2301 
2302   -- ALGLIB --
2303      2005-2010
2304      Bochkanov Sergey
2305 *************************************************************************/
rmatrixbdmultiplybyq(const real_2d_array & qp,const ae_int_t m,const ae_int_t n,const real_1d_array & tauq,real_2d_array & z,const ae_int_t zrows,const ae_int_t zcolumns,const bool fromtheright,const bool dotranspose,const xparams _xparams)2306 void rmatrixbdmultiplybyq(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &tauq, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose, const xparams _xparams)
2307 {
2308     jmp_buf _break_jump;
2309     alglib_impl::ae_state _alglib_env_state;
2310     alglib_impl::ae_state_init(&_alglib_env_state);
2311     if( setjmp(_break_jump) )
2312     {
2313 #if !defined(AE_NO_EXCEPTIONS)
2314         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2315 #else
2316         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2317         return;
2318 #endif
2319     }
2320     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2321     if( _xparams.flags!=0x0 )
2322         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2323     alglib_impl::rmatrixbdmultiplybyq(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(tauq.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), zrows, zcolumns, fromtheright, dotranspose, &_alglib_env_state);
2324     alglib_impl::ae_state_clear(&_alglib_env_state);
2325     return;
2326 }
2327 
2328 /*************************************************************************
2329 Unpacking matrix P which reduces matrix A to bidiagonal form.
2330 The subroutine returns transposed matrix P.
2331 
2332 Input parameters:
2333     QP      -   matrices Q and P in compact form.
2334                 Output of ToBidiagonal subroutine.
2335     M       -   number of rows in matrix A.
2336     N       -   number of columns in matrix A.
2337     TAUP    -   scalar factors which are used to form P.
2338                 Output of ToBidiagonal subroutine.
2339     PTRows  -   required number of rows of matrix P^T. N >= PTRows >= 0.
2340 
2341 Output parameters:
2342     PT      -   first PTRows columns of matrix P^T
2343                 Array[0..PTRows-1, 0..N-1]
2344                 If PTRows=0, the array is not modified.
2345 
2346   -- ALGLIB --
2347      2005-2010
2348      Bochkanov Sergey
2349 *************************************************************************/
rmatrixbdunpackpt(const real_2d_array & qp,const ae_int_t m,const ae_int_t n,const real_1d_array & taup,const ae_int_t ptrows,real_2d_array & pt,const xparams _xparams)2350 void rmatrixbdunpackpt(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, const ae_int_t ptrows, real_2d_array &pt, const xparams _xparams)
2351 {
2352     jmp_buf _break_jump;
2353     alglib_impl::ae_state _alglib_env_state;
2354     alglib_impl::ae_state_init(&_alglib_env_state);
2355     if( setjmp(_break_jump) )
2356     {
2357 #if !defined(AE_NO_EXCEPTIONS)
2358         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2359 #else
2360         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2361         return;
2362 #endif
2363     }
2364     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2365     if( _xparams.flags!=0x0 )
2366         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2367     alglib_impl::rmatrixbdunpackpt(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), ptrows, const_cast<alglib_impl::ae_matrix*>(pt.c_ptr()), &_alglib_env_state);
2368     alglib_impl::ae_state_clear(&_alglib_env_state);
2369     return;
2370 }
2371 
2372 /*************************************************************************
2373 Multiplication by matrix P which reduces matrix A to  bidiagonal form.
2374 
2375 The algorithm allows pre- or post-multiply by P or P'.
2376 
2377 Input parameters:
2378     QP          -   matrices Q and P in compact form.
2379                     Output of RMatrixBD subroutine.
2380     M           -   number of rows in matrix A.
2381     N           -   number of columns in matrix A.
2382     TAUP        -   scalar factors which are used to form P.
2383                     Output of RMatrixBD subroutine.
2384     Z           -   multiplied matrix.
2385                     Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
2386     ZRows       -   number of rows in matrix Z. If FromTheRight=False,
2387                     ZRows=N, otherwise ZRows can be arbitrary.
2388     ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
2389                     ZColumns=N, otherwise ZColumns can be arbitrary.
2390     FromTheRight -  pre- or post-multiply.
2391     DoTranspose -   multiply by P or P'.
2392 
2393 Output parameters:
2394     Z - product of Z and P.
2395                 Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
2396                 If ZRows=0 or ZColumns=0, the array is not modified.
2397 
2398   -- ALGLIB --
2399      2005-2010
2400      Bochkanov Sergey
2401 *************************************************************************/
rmatrixbdmultiplybyp(const real_2d_array & qp,const ae_int_t m,const ae_int_t n,const real_1d_array & taup,real_2d_array & z,const ae_int_t zrows,const ae_int_t zcolumns,const bool fromtheright,const bool dotranspose,const xparams _xparams)2402 void rmatrixbdmultiplybyp(const real_2d_array &qp, const ae_int_t m, const ae_int_t n, const real_1d_array &taup, real_2d_array &z, const ae_int_t zrows, const ae_int_t zcolumns, const bool fromtheright, const bool dotranspose, const xparams _xparams)
2403 {
2404     jmp_buf _break_jump;
2405     alglib_impl::ae_state _alglib_env_state;
2406     alglib_impl::ae_state_init(&_alglib_env_state);
2407     if( setjmp(_break_jump) )
2408     {
2409 #if !defined(AE_NO_EXCEPTIONS)
2410         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2411 #else
2412         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2413         return;
2414 #endif
2415     }
2416     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2417     if( _xparams.flags!=0x0 )
2418         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2419     alglib_impl::rmatrixbdmultiplybyp(const_cast<alglib_impl::ae_matrix*>(qp.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(taup.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), zrows, zcolumns, fromtheright, dotranspose, &_alglib_env_state);
2420     alglib_impl::ae_state_clear(&_alglib_env_state);
2421     return;
2422 }
2423 
2424 /*************************************************************************
2425 Unpacking of the main and secondary diagonals of bidiagonal decomposition
2426 of matrix A.
2427 
2428 Input parameters:
2429     B   -   output of RMatrixBD subroutine.
2430     M   -   number of rows in matrix B.
2431     N   -   number of columns in matrix B.
2432 
2433 Output parameters:
2434     IsUpper -   True, if the matrix is upper bidiagonal.
2435                 otherwise IsUpper is False.
2436     D       -   the main diagonal.
2437                 Array whose index ranges within [0..Min(M,N)-1].
2438     E       -   the secondary diagonal (upper or lower, depending on
2439                 the value of IsUpper).
2440                 Array index ranges within [0..Min(M,N)-1], the last
2441                 element is not used.
2442 
2443   -- ALGLIB --
2444      2005-2010
2445      Bochkanov Sergey
2446 *************************************************************************/
rmatrixbdunpackdiagonals(const real_2d_array & b,const ae_int_t m,const ae_int_t n,bool & isupper,real_1d_array & d,real_1d_array & e,const xparams _xparams)2447 void rmatrixbdunpackdiagonals(const real_2d_array &b, const ae_int_t m, const ae_int_t n, bool &isupper, real_1d_array &d, real_1d_array &e, const xparams _xparams)
2448 {
2449     jmp_buf _break_jump;
2450     alglib_impl::ae_state _alglib_env_state;
2451     alglib_impl::ae_state_init(&_alglib_env_state);
2452     if( setjmp(_break_jump) )
2453     {
2454 #if !defined(AE_NO_EXCEPTIONS)
2455         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2456 #else
2457         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2458         return;
2459 #endif
2460     }
2461     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2462     if( _xparams.flags!=0x0 )
2463         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2464     alglib_impl::rmatrixbdunpackdiagonals(const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), m, n, &isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state);
2465     alglib_impl::ae_state_clear(&_alglib_env_state);
2466     return;
2467 }
2468 
2469 /*************************************************************************
2470 Reduction of a square matrix to  upper Hessenberg form: Q'*A*Q = H,
2471 where Q is an orthogonal matrix, H - Hessenberg matrix.
2472 
2473   ! COMMERCIAL EDITION OF ALGLIB:
2474   !
2475   ! Commercial Edition of ALGLIB includes following important improvements
2476   ! of this function:
2477   ! * high-performance native backend with same C# interface (C# version)
2478   ! * hardware vendor (Intel) implementations of linear algebra primitives
2479   !   (C++ and C# versions, x86/x64 platform)
2480   !
2481   ! We recommend you to read 'Working with commercial version' section  of
2482   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2483   ! related features provided by commercial edition of ALGLIB.
2484 
2485 Input parameters:
2486     A       -   matrix A with elements [0..N-1, 0..N-1]
2487     N       -   size of matrix A.
2488 
2489 Output parameters:
2490     A       -   matrices Q and P in  compact form (see below).
2491     Tau     -   array of scalar factors which are used to form matrix Q.
2492                 Array whose index ranges within [0..N-2]
2493 
2494 Matrix H is located on the main diagonal, on the lower secondary  diagonal
2495 and above the main diagonal of matrix A. The elements which are used to
2496 form matrix Q are situated in array Tau and below the lower secondary
2497 diagonal of matrix A as follows:
2498 
2499 Matrix Q is represented as a product of elementary reflections
2500 
2501 Q = H(0)*H(2)*...*H(n-2),
2502 
2503 where each H(i) is given by
2504 
2505 H(i) = 1 - tau * v * (v^T)
2506 
2507 where tau is a scalar stored in Tau[I]; v - is a real vector,
2508 so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i).
2509 
2510   -- LAPACK routine (version 3.0) --
2511      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2512      Courant Institute, Argonne National Lab, and Rice University
2513      October 31, 1992
2514 *************************************************************************/
rmatrixhessenberg(real_2d_array & a,const ae_int_t n,real_1d_array & tau,const xparams _xparams)2515 void rmatrixhessenberg(real_2d_array &a, const ae_int_t n, real_1d_array &tau, const xparams _xparams)
2516 {
2517     jmp_buf _break_jump;
2518     alglib_impl::ae_state _alglib_env_state;
2519     alglib_impl::ae_state_init(&_alglib_env_state);
2520     if( setjmp(_break_jump) )
2521     {
2522 #if !defined(AE_NO_EXCEPTIONS)
2523         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2524 #else
2525         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2526         return;
2527 #endif
2528     }
2529     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2530     if( _xparams.flags!=0x0 )
2531         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2532     alglib_impl::rmatrixhessenberg(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), &_alglib_env_state);
2533     alglib_impl::ae_state_clear(&_alglib_env_state);
2534     return;
2535 }
2536 
2537 /*************************************************************************
2538 Unpacking matrix Q which reduces matrix A to upper Hessenberg form
2539 
2540   ! COMMERCIAL EDITION OF ALGLIB:
2541   !
2542   ! Commercial Edition of ALGLIB includes following important improvements
2543   ! of this function:
2544   ! * high-performance native backend with same C# interface (C# version)
2545   ! * hardware vendor (Intel) implementations of linear algebra primitives
2546   !   (C++ and C# versions, x86/x64 platform)
2547   !
2548   ! We recommend you to read 'Working with commercial version' section  of
2549   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2550   ! related features provided by commercial edition of ALGLIB.
2551 
2552 Input parameters:
2553     A   -   output of RMatrixHessenberg subroutine.
2554     N   -   size of matrix A.
2555     Tau -   scalar factors which are used to form Q.
2556             Output of RMatrixHessenberg subroutine.
2557 
2558 Output parameters:
2559     Q   -   matrix Q.
2560             Array whose indexes range within [0..N-1, 0..N-1].
2561 
2562   -- ALGLIB --
2563      2005-2010
2564      Bochkanov Sergey
2565 *************************************************************************/
rmatrixhessenbergunpackq(const real_2d_array & a,const ae_int_t n,const real_1d_array & tau,real_2d_array & q,const xparams _xparams)2566 void rmatrixhessenbergunpackq(const real_2d_array &a, const ae_int_t n, const real_1d_array &tau, real_2d_array &q, const xparams _xparams)
2567 {
2568     jmp_buf _break_jump;
2569     alglib_impl::ae_state _alglib_env_state;
2570     alglib_impl::ae_state_init(&_alglib_env_state);
2571     if( setjmp(_break_jump) )
2572     {
2573 #if !defined(AE_NO_EXCEPTIONS)
2574         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2575 #else
2576         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2577         return;
2578 #endif
2579     }
2580     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2581     if( _xparams.flags!=0x0 )
2582         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2583     alglib_impl::rmatrixhessenbergunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
2584     alglib_impl::ae_state_clear(&_alglib_env_state);
2585     return;
2586 }
2587 
2588 /*************************************************************************
2589 Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form)
2590 
2591 Input parameters:
2592     A   -   output of RMatrixHessenberg subroutine.
2593     N   -   size of matrix A.
2594 
2595 Output parameters:
2596     H   -   matrix H. Array whose indexes range within [0..N-1, 0..N-1].
2597 
2598   -- ALGLIB --
2599      2005-2010
2600      Bochkanov Sergey
2601 *************************************************************************/
rmatrixhessenbergunpackh(const real_2d_array & a,const ae_int_t n,real_2d_array & h,const xparams _xparams)2602 void rmatrixhessenbergunpackh(const real_2d_array &a, const ae_int_t n, real_2d_array &h, const xparams _xparams)
2603 {
2604     jmp_buf _break_jump;
2605     alglib_impl::ae_state _alglib_env_state;
2606     alglib_impl::ae_state_init(&_alglib_env_state);
2607     if( setjmp(_break_jump) )
2608     {
2609 #if !defined(AE_NO_EXCEPTIONS)
2610         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2611 #else
2612         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2613         return;
2614 #endif
2615     }
2616     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2617     if( _xparams.flags!=0x0 )
2618         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2619     alglib_impl::rmatrixhessenbergunpackh(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_matrix*>(h.c_ptr()), &_alglib_env_state);
2620     alglib_impl::ae_state_clear(&_alglib_env_state);
2621     return;
2622 }
2623 
2624 /*************************************************************************
2625 Reduction of a symmetric matrix which is given by its higher or lower
2626 triangular part to a tridiagonal matrix using orthogonal similarity
2627 transformation: Q'*A*Q=T.
2628 
2629   ! COMMERCIAL EDITION OF ALGLIB:
2630   !
2631   ! Commercial Edition of ALGLIB includes following important improvements
2632   ! of this function:
2633   ! * high-performance native backend with same C# interface (C# version)
2634   ! * hardware vendor (Intel) implementations of linear algebra primitives
2635   !   (C++ and C# versions, x86/x64 platform)
2636   !
2637   ! We recommend you to read 'Working with commercial version' section  of
2638   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2639   ! related features provided by commercial edition of ALGLIB.
2640 
2641 Input parameters:
2642     A       -   matrix to be transformed
2643                 array with elements [0..N-1, 0..N-1].
2644     N       -   size of matrix A.
2645     IsUpper -   storage format. If IsUpper = True, then matrix A is given
2646                 by its upper triangle, and the lower triangle is not used
2647                 and not modified by the algorithm, and vice versa
2648                 if IsUpper = False.
2649 
2650 Output parameters:
2651     A       -   matrices T and Q in  compact form (see lower)
2652     Tau     -   array of factors which are forming matrices H(i)
2653                 array with elements [0..N-2].
2654     D       -   main diagonal of symmetric matrix T.
2655                 array with elements [0..N-1].
2656     E       -   secondary diagonal of symmetric matrix T.
2657                 array with elements [0..N-2].
2658 
2659 
2660   If IsUpper=True, the matrix Q is represented as a product of elementary
2661   reflectors
2662 
2663      Q = H(n-2) . . . H(2) H(0).
2664 
2665   Each H(i) has the form
2666 
2667      H(i) = I - tau * v * v'
2668 
2669   where tau is a real scalar, and v is a real vector with
2670   v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
2671   A(0:i-1,i+1), and tau in TAU(i).
2672 
2673   If IsUpper=False, the matrix Q is represented as a product of elementary
2674   reflectors
2675 
2676      Q = H(0) H(2) . . . H(n-2).
2677 
2678   Each H(i) has the form
2679 
2680      H(i) = I - tau * v * v'
2681 
2682   where tau is a real scalar, and v is a real vector with
2683   v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
2684   and tau in TAU(i).
2685 
2686   The contents of A on exit are illustrated by the following examples
2687   with n = 5:
2688 
2689   if UPLO = 'U':                       if UPLO = 'L':
2690 
2691     (  d   e   v1  v2  v3 )              (  d                  )
2692     (      d   e   v2  v3 )              (  e   d              )
2693     (          d   e   v3 )              (  v0  e   d          )
2694     (              d   e  )              (  v0  v1  e   d      )
2695     (                  d  )              (  v0  v1  v2  e   d  )
2696 
2697   where d and e denote diagonal and off-diagonal elements of T, and vi
2698   denotes an element of the vector defining H(i).
2699 
2700   -- LAPACK routine (version 3.0) --
2701      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2702      Courant Institute, Argonne National Lab, and Rice University
2703      October 31, 1992
2704 *************************************************************************/
smatrixtd(real_2d_array & a,const ae_int_t n,const bool isupper,real_1d_array & tau,real_1d_array & d,real_1d_array & e,const xparams _xparams)2705 void smatrixtd(real_2d_array &a, const ae_int_t n, const bool isupper, real_1d_array &tau, real_1d_array &d, real_1d_array &e, const xparams _xparams)
2706 {
2707     jmp_buf _break_jump;
2708     alglib_impl::ae_state _alglib_env_state;
2709     alglib_impl::ae_state_init(&_alglib_env_state);
2710     if( setjmp(_break_jump) )
2711     {
2712 #if !defined(AE_NO_EXCEPTIONS)
2713         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2714 #else
2715         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2716         return;
2717 #endif
2718     }
2719     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2720     if( _xparams.flags!=0x0 )
2721         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2722     alglib_impl::smatrixtd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state);
2723     alglib_impl::ae_state_clear(&_alglib_env_state);
2724     return;
2725 }
2726 
2727 /*************************************************************************
2728 Unpacking matrix Q which reduces symmetric matrix to a tridiagonal
2729 form.
2730 
2731   ! COMMERCIAL EDITION OF ALGLIB:
2732   !
2733   ! Commercial Edition of ALGLIB includes following important improvements
2734   ! of this function:
2735   ! * high-performance native backend with same C# interface (C# version)
2736   ! * hardware vendor (Intel) implementations of linear algebra primitives
2737   !   (C++ and C# versions, x86/x64 platform)
2738   !
2739   ! We recommend you to read 'Working with commercial version' section  of
2740   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2741   ! related features provided by commercial edition of ALGLIB.
2742 
2743 Input parameters:
2744     A       -   the result of a SMatrixTD subroutine
2745     N       -   size of matrix A.
2746     IsUpper -   storage format (a parameter of SMatrixTD subroutine)
2747     Tau     -   the result of a SMatrixTD subroutine
2748 
2749 Output parameters:
2750     Q       -   transformation matrix.
2751                 array with elements [0..N-1, 0..N-1].
2752 
2753   -- ALGLIB --
2754      Copyright 2005-2010 by Bochkanov Sergey
2755 *************************************************************************/
smatrixtdunpackq(const real_2d_array & a,const ae_int_t n,const bool isupper,const real_1d_array & tau,real_2d_array & q,const xparams _xparams)2756 void smatrixtdunpackq(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &tau, real_2d_array &q, const xparams _xparams)
2757 {
2758     jmp_buf _break_jump;
2759     alglib_impl::ae_state _alglib_env_state;
2760     alglib_impl::ae_state_init(&_alglib_env_state);
2761     if( setjmp(_break_jump) )
2762     {
2763 #if !defined(AE_NO_EXCEPTIONS)
2764         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2765 #else
2766         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2767         return;
2768 #endif
2769     }
2770     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2771     if( _xparams.flags!=0x0 )
2772         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2773     alglib_impl::smatrixtdunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
2774     alglib_impl::ae_state_clear(&_alglib_env_state);
2775     return;
2776 }
2777 
2778 /*************************************************************************
2779 Reduction of a Hermitian matrix which is given  by  its  higher  or  lower
2780 triangular part to a real  tridiagonal  matrix  using  unitary  similarity
2781 transformation: Q'*A*Q = T.
2782 
2783   ! COMMERCIAL EDITION OF ALGLIB:
2784   !
2785   ! Commercial Edition of ALGLIB includes following important improvements
2786   ! of this function:
2787   ! * high-performance native backend with same C# interface (C# version)
2788   ! * hardware vendor (Intel) implementations of linear algebra primitives
2789   !   (C++ and C# versions, x86/x64 platform)
2790   !
2791   ! We recommend you to read 'Working with commercial version' section  of
2792   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2793   ! related features provided by commercial edition of ALGLIB.
2794 
2795 Input parameters:
2796     A       -   matrix to be transformed
2797                 array with elements [0..N-1, 0..N-1].
2798     N       -   size of matrix A.
2799     IsUpper -   storage format. If IsUpper = True, then matrix A is  given
2800                 by its upper triangle, and the lower triangle is not  used
2801                 and not modified by the algorithm, and vice versa
2802                 if IsUpper = False.
2803 
2804 Output parameters:
2805     A       -   matrices T and Q in  compact form (see lower)
2806     Tau     -   array of factors which are forming matrices H(i)
2807                 array with elements [0..N-2].
2808     D       -   main diagonal of real symmetric matrix T.
2809                 array with elements [0..N-1].
2810     E       -   secondary diagonal of real symmetric matrix T.
2811                 array with elements [0..N-2].
2812 
2813 
2814   If IsUpper=True, the matrix Q is represented as a product of elementary
2815   reflectors
2816 
2817      Q = H(n-2) . . . H(2) H(0).
2818 
2819   Each H(i) has the form
2820 
2821      H(i) = I - tau * v * v'
2822 
2823   where tau is a complex scalar, and v is a complex vector with
2824   v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
2825   A(0:i-1,i+1), and tau in TAU(i).
2826 
2827   If IsUpper=False, the matrix Q is represented as a product of elementary
2828   reflectors
2829 
2830      Q = H(0) H(2) . . . H(n-2).
2831 
2832   Each H(i) has the form
2833 
2834      H(i) = I - tau * v * v'
2835 
2836   where tau is a complex scalar, and v is a complex vector with
2837   v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
2838   and tau in TAU(i).
2839 
2840   The contents of A on exit are illustrated by the following examples
2841   with n = 5:
2842 
2843   if UPLO = 'U':                       if UPLO = 'L':
2844 
2845     (  d   e   v1  v2  v3 )              (  d                  )
2846     (      d   e   v2  v3 )              (  e   d              )
2847     (          d   e   v3 )              (  v0  e   d          )
2848     (              d   e  )              (  v0  v1  e   d      )
2849     (                  d  )              (  v0  v1  v2  e   d  )
2850 
2851 where d and e denote diagonal and off-diagonal elements of T, and vi
2852 denotes an element of the vector defining H(i).
2853 
2854   -- LAPACK routine (version 3.0) --
2855      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2856      Courant Institute, Argonne National Lab, and Rice University
2857      October 31, 1992
2858 *************************************************************************/
hmatrixtd(complex_2d_array & a,const ae_int_t n,const bool isupper,complex_1d_array & tau,real_1d_array & d,real_1d_array & e,const xparams _xparams)2859 void hmatrixtd(complex_2d_array &a, const ae_int_t n, const bool isupper, complex_1d_array &tau, real_1d_array &d, real_1d_array &e, const xparams _xparams)
2860 {
2861     jmp_buf _break_jump;
2862     alglib_impl::ae_state _alglib_env_state;
2863     alglib_impl::ae_state_init(&_alglib_env_state);
2864     if( setjmp(_break_jump) )
2865     {
2866 #if !defined(AE_NO_EXCEPTIONS)
2867         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2868 #else
2869         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2870         return;
2871 #endif
2872     }
2873     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2874     if( _xparams.flags!=0x0 )
2875         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2876     alglib_impl::hmatrixtd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), &_alglib_env_state);
2877     alglib_impl::ae_state_clear(&_alglib_env_state);
2878     return;
2879 }
2880 
2881 /*************************************************************************
2882 Unpacking matrix Q which reduces a Hermitian matrix to a real  tridiagonal
2883 form.
2884 
2885   ! COMMERCIAL EDITION OF ALGLIB:
2886   !
2887   ! Commercial Edition of ALGLIB includes following important improvements
2888   ! of this function:
2889   ! * high-performance native backend with same C# interface (C# version)
2890   ! * hardware vendor (Intel) implementations of linear algebra primitives
2891   !   (C++ and C# versions, x86/x64 platform)
2892   !
2893   ! We recommend you to read 'Working with commercial version' section  of
2894   ! ALGLIB Reference Manual in order to find out how to  use  performance-
2895   ! related features provided by commercial edition of ALGLIB.
2896 
2897 Input parameters:
2898     A       -   the result of a HMatrixTD subroutine
2899     N       -   size of matrix A.
2900     IsUpper -   storage format (a parameter of HMatrixTD subroutine)
2901     Tau     -   the result of a HMatrixTD subroutine
2902 
2903 Output parameters:
2904     Q       -   transformation matrix.
2905                 array with elements [0..N-1, 0..N-1].
2906 
2907   -- ALGLIB --
2908      Copyright 2005-2010 by Bochkanov Sergey
2909 *************************************************************************/
hmatrixtdunpackq(const complex_2d_array & a,const ae_int_t n,const bool isupper,const complex_1d_array & tau,complex_2d_array & q,const xparams _xparams)2910 void hmatrixtdunpackq(const complex_2d_array &a, const ae_int_t n, const bool isupper, const complex_1d_array &tau, complex_2d_array &q, const xparams _xparams)
2911 {
2912     jmp_buf _break_jump;
2913     alglib_impl::ae_state _alglib_env_state;
2914     alglib_impl::ae_state_init(&_alglib_env_state);
2915     if( setjmp(_break_jump) )
2916     {
2917 #if !defined(AE_NO_EXCEPTIONS)
2918         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2919 #else
2920         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2921         return;
2922 #endif
2923     }
2924     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2925     if( _xparams.flags!=0x0 )
2926         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2927     alglib_impl::hmatrixtdunpackq(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(tau.c_ptr()), const_cast<alglib_impl::ae_matrix*>(q.c_ptr()), &_alglib_env_state);
2928     alglib_impl::ae_state_clear(&_alglib_env_state);
2929     return;
2930 }
2931 #endif
2932 
2933 #if defined(AE_COMPILE_MATGEN) || !defined(AE_PARTIAL_BUILD)
2934 /*************************************************************************
2935 Generation of a random uniformly distributed (Haar) orthogonal matrix
2936 
2937 INPUT PARAMETERS:
2938     N   -   matrix size, N>=1
2939 
2940 OUTPUT PARAMETERS:
2941     A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
2942 
2943 NOTE: this function uses algorithm  described  in  Stewart, G. W.  (1980),
2944       "The Efficient Generation of  Random  Orthogonal  Matrices  with  an
2945       Application to Condition Estimators".
2946 
2947       Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it:
2948       * takes an NxN one
2949       * takes uniformly distributed unit vector of dimension N+1.
2950       * constructs a Householder reflection from the vector, then applies
2951         it to the smaller matrix (embedded in the larger size with a 1 at
2952         the bottom right corner).
2953 
2954   -- ALGLIB routine --
2955      04.12.2009
2956      Bochkanov Sergey
2957 *************************************************************************/
rmatrixrndorthogonal(const ae_int_t n,real_2d_array & a,const xparams _xparams)2958 void rmatrixrndorthogonal(const ae_int_t n, real_2d_array &a, const xparams _xparams)
2959 {
2960     jmp_buf _break_jump;
2961     alglib_impl::ae_state _alglib_env_state;
2962     alglib_impl::ae_state_init(&_alglib_env_state);
2963     if( setjmp(_break_jump) )
2964     {
2965 #if !defined(AE_NO_EXCEPTIONS)
2966         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2967 #else
2968         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2969         return;
2970 #endif
2971     }
2972     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2973     if( _xparams.flags!=0x0 )
2974         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2975     alglib_impl::rmatrixrndorthogonal(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
2976     alglib_impl::ae_state_clear(&_alglib_env_state);
2977     return;
2978 }
2979 
2980 /*************************************************************************
2981 Generation of random NxN matrix with given condition number and norm2(A)=1
2982 
2983 INPUT PARAMETERS:
2984     N   -   matrix size
2985     C   -   condition number (in 2-norm)
2986 
2987 OUTPUT PARAMETERS:
2988     A   -   random matrix with norm2(A)=1 and cond(A)=C
2989 
2990   -- ALGLIB routine --
2991      04.12.2009
2992      Bochkanov Sergey
2993 *************************************************************************/
rmatrixrndcond(const ae_int_t n,const double c,real_2d_array & a,const xparams _xparams)2994 void rmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a, const xparams _xparams)
2995 {
2996     jmp_buf _break_jump;
2997     alglib_impl::ae_state _alglib_env_state;
2998     alglib_impl::ae_state_init(&_alglib_env_state);
2999     if( setjmp(_break_jump) )
3000     {
3001 #if !defined(AE_NO_EXCEPTIONS)
3002         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3003 #else
3004         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3005         return;
3006 #endif
3007     }
3008     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3009     if( _xparams.flags!=0x0 )
3010         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3011     alglib_impl::rmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
3012     alglib_impl::ae_state_clear(&_alglib_env_state);
3013     return;
3014 }
3015 
3016 /*************************************************************************
3017 Generation of a random Haar distributed orthogonal complex matrix
3018 
3019 INPUT PARAMETERS:
3020     N   -   matrix size, N>=1
3021 
3022 OUTPUT PARAMETERS:
3023     A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
3024 
3025 NOTE: this function uses algorithm  described  in  Stewart, G. W.  (1980),
3026       "The Efficient Generation of  Random  Orthogonal  Matrices  with  an
3027       Application to Condition Estimators".
3028 
3029       Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it:
3030       * takes an NxN one
3031       * takes uniformly distributed unit vector of dimension N+1.
3032       * constructs a Householder reflection from the vector, then applies
3033         it to the smaller matrix (embedded in the larger size with a 1 at
3034         the bottom right corner).
3035 
3036   -- ALGLIB routine --
3037      04.12.2009
3038      Bochkanov Sergey
3039 *************************************************************************/
cmatrixrndorthogonal(const ae_int_t n,complex_2d_array & a,const xparams _xparams)3040 void cmatrixrndorthogonal(const ae_int_t n, complex_2d_array &a, const xparams _xparams)
3041 {
3042     jmp_buf _break_jump;
3043     alglib_impl::ae_state _alglib_env_state;
3044     alglib_impl::ae_state_init(&_alglib_env_state);
3045     if( setjmp(_break_jump) )
3046     {
3047 #if !defined(AE_NO_EXCEPTIONS)
3048         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3049 #else
3050         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3051         return;
3052 #endif
3053     }
3054     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3055     if( _xparams.flags!=0x0 )
3056         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3057     alglib_impl::cmatrixrndorthogonal(n, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
3058     alglib_impl::ae_state_clear(&_alglib_env_state);
3059     return;
3060 }
3061 
3062 /*************************************************************************
3063 Generation of random NxN complex matrix with given condition number C and
3064 norm2(A)=1
3065 
3066 INPUT PARAMETERS:
3067     N   -   matrix size
3068     C   -   condition number (in 2-norm)
3069 
3070 OUTPUT PARAMETERS:
3071     A   -   random matrix with norm2(A)=1 and cond(A)=C
3072 
3073   -- ALGLIB routine --
3074      04.12.2009
3075      Bochkanov Sergey
3076 *************************************************************************/
cmatrixrndcond(const ae_int_t n,const double c,complex_2d_array & a,const xparams _xparams)3077 void cmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a, const xparams _xparams)
3078 {
3079     jmp_buf _break_jump;
3080     alglib_impl::ae_state _alglib_env_state;
3081     alglib_impl::ae_state_init(&_alglib_env_state);
3082     if( setjmp(_break_jump) )
3083     {
3084 #if !defined(AE_NO_EXCEPTIONS)
3085         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3086 #else
3087         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3088         return;
3089 #endif
3090     }
3091     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3092     if( _xparams.flags!=0x0 )
3093         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3094     alglib_impl::cmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
3095     alglib_impl::ae_state_clear(&_alglib_env_state);
3096     return;
3097 }
3098 
3099 /*************************************************************************
3100 Generation of random NxN symmetric matrix with given condition number  and
3101 norm2(A)=1
3102 
3103 INPUT PARAMETERS:
3104     N   -   matrix size
3105     C   -   condition number (in 2-norm)
3106 
3107 OUTPUT PARAMETERS:
3108     A   -   random matrix with norm2(A)=1 and cond(A)=C
3109 
3110   -- ALGLIB routine --
3111      04.12.2009
3112      Bochkanov Sergey
3113 *************************************************************************/
smatrixrndcond(const ae_int_t n,const double c,real_2d_array & a,const xparams _xparams)3114 void smatrixrndcond(const ae_int_t n, const double c, real_2d_array &a, const xparams _xparams)
3115 {
3116     jmp_buf _break_jump;
3117     alglib_impl::ae_state _alglib_env_state;
3118     alglib_impl::ae_state_init(&_alglib_env_state);
3119     if( setjmp(_break_jump) )
3120     {
3121 #if !defined(AE_NO_EXCEPTIONS)
3122         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3123 #else
3124         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3125         return;
3126 #endif
3127     }
3128     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3129     if( _xparams.flags!=0x0 )
3130         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3131     alglib_impl::smatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
3132     alglib_impl::ae_state_clear(&_alglib_env_state);
3133     return;
3134 }
3135 
3136 /*************************************************************************
3137 Generation of random NxN symmetric positive definite matrix with given
3138 condition number and norm2(A)=1
3139 
3140 INPUT PARAMETERS:
3141     N   -   matrix size
3142     C   -   condition number (in 2-norm)
3143 
3144 OUTPUT PARAMETERS:
3145     A   -   random SPD matrix with norm2(A)=1 and cond(A)=C
3146 
3147   -- ALGLIB routine --
3148      04.12.2009
3149      Bochkanov Sergey
3150 *************************************************************************/
spdmatrixrndcond(const ae_int_t n,const double c,real_2d_array & a,const xparams _xparams)3151 void spdmatrixrndcond(const ae_int_t n, const double c, real_2d_array &a, const xparams _xparams)
3152 {
3153     jmp_buf _break_jump;
3154     alglib_impl::ae_state _alglib_env_state;
3155     alglib_impl::ae_state_init(&_alglib_env_state);
3156     if( setjmp(_break_jump) )
3157     {
3158 #if !defined(AE_NO_EXCEPTIONS)
3159         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3160 #else
3161         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3162         return;
3163 #endif
3164     }
3165     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3166     if( _xparams.flags!=0x0 )
3167         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3168     alglib_impl::spdmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
3169     alglib_impl::ae_state_clear(&_alglib_env_state);
3170     return;
3171 }
3172 
3173 /*************************************************************************
3174 Generation of random NxN Hermitian matrix with given condition number  and
3175 norm2(A)=1
3176 
3177 INPUT PARAMETERS:
3178     N   -   matrix size
3179     C   -   condition number (in 2-norm)
3180 
3181 OUTPUT PARAMETERS:
3182     A   -   random matrix with norm2(A)=1 and cond(A)=C
3183 
3184   -- ALGLIB routine --
3185      04.12.2009
3186      Bochkanov Sergey
3187 *************************************************************************/
hmatrixrndcond(const ae_int_t n,const double c,complex_2d_array & a,const xparams _xparams)3188 void hmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a, const xparams _xparams)
3189 {
3190     jmp_buf _break_jump;
3191     alglib_impl::ae_state _alglib_env_state;
3192     alglib_impl::ae_state_init(&_alglib_env_state);
3193     if( setjmp(_break_jump) )
3194     {
3195 #if !defined(AE_NO_EXCEPTIONS)
3196         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3197 #else
3198         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3199         return;
3200 #endif
3201     }
3202     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3203     if( _xparams.flags!=0x0 )
3204         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3205     alglib_impl::hmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
3206     alglib_impl::ae_state_clear(&_alglib_env_state);
3207     return;
3208 }
3209 
3210 /*************************************************************************
3211 Generation of random NxN Hermitian positive definite matrix with given
3212 condition number and norm2(A)=1
3213 
3214 INPUT PARAMETERS:
3215     N   -   matrix size
3216     C   -   condition number (in 2-norm)
3217 
3218 OUTPUT PARAMETERS:
3219     A   -   random HPD matrix with norm2(A)=1 and cond(A)=C
3220 
3221   -- ALGLIB routine --
3222      04.12.2009
3223      Bochkanov Sergey
3224 *************************************************************************/
hpdmatrixrndcond(const ae_int_t n,const double c,complex_2d_array & a,const xparams _xparams)3225 void hpdmatrixrndcond(const ae_int_t n, const double c, complex_2d_array &a, const xparams _xparams)
3226 {
3227     jmp_buf _break_jump;
3228     alglib_impl::ae_state _alglib_env_state;
3229     alglib_impl::ae_state_init(&_alglib_env_state);
3230     if( setjmp(_break_jump) )
3231     {
3232 #if !defined(AE_NO_EXCEPTIONS)
3233         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3234 #else
3235         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3236         return;
3237 #endif
3238     }
3239     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3240     if( _xparams.flags!=0x0 )
3241         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3242     alglib_impl::hpdmatrixrndcond(n, c, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &_alglib_env_state);
3243     alglib_impl::ae_state_clear(&_alglib_env_state);
3244     return;
3245 }
3246 
3247 /*************************************************************************
3248 Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix
3249 
3250 INPUT PARAMETERS:
3251     A   -   matrix, array[0..M-1, 0..N-1]
3252     M, N-   matrix size
3253 
3254 OUTPUT PARAMETERS:
3255     A   -   A*Q, where Q is random NxN orthogonal matrix
3256 
3257   -- ALGLIB routine --
3258      04.12.2009
3259      Bochkanov Sergey
3260 *************************************************************************/
rmatrixrndorthogonalfromtheright(real_2d_array & a,const ae_int_t m,const ae_int_t n,const xparams _xparams)3261 void rmatrixrndorthogonalfromtheright(real_2d_array &a, const ae_int_t m, const ae_int_t n, const xparams _xparams)
3262 {
3263     jmp_buf _break_jump;
3264     alglib_impl::ae_state _alglib_env_state;
3265     alglib_impl::ae_state_init(&_alglib_env_state);
3266     if( setjmp(_break_jump) )
3267     {
3268 #if !defined(AE_NO_EXCEPTIONS)
3269         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3270 #else
3271         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3272         return;
3273 #endif
3274     }
3275     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3276     if( _xparams.flags!=0x0 )
3277         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3278     alglib_impl::rmatrixrndorthogonalfromtheright(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
3279     alglib_impl::ae_state_clear(&_alglib_env_state);
3280     return;
3281 }
3282 
3283 /*************************************************************************
3284 Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix
3285 
3286 INPUT PARAMETERS:
3287     A   -   matrix, array[0..M-1, 0..N-1]
3288     M, N-   matrix size
3289 
3290 OUTPUT PARAMETERS:
3291     A   -   Q*A, where Q is random MxM orthogonal matrix
3292 
3293   -- ALGLIB routine --
3294      04.12.2009
3295      Bochkanov Sergey
3296 *************************************************************************/
rmatrixrndorthogonalfromtheleft(real_2d_array & a,const ae_int_t m,const ae_int_t n,const xparams _xparams)3297 void rmatrixrndorthogonalfromtheleft(real_2d_array &a, const ae_int_t m, const ae_int_t n, const xparams _xparams)
3298 {
3299     jmp_buf _break_jump;
3300     alglib_impl::ae_state _alglib_env_state;
3301     alglib_impl::ae_state_init(&_alglib_env_state);
3302     if( setjmp(_break_jump) )
3303     {
3304 #if !defined(AE_NO_EXCEPTIONS)
3305         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3306 #else
3307         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3308         return;
3309 #endif
3310     }
3311     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3312     if( _xparams.flags!=0x0 )
3313         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3314     alglib_impl::rmatrixrndorthogonalfromtheleft(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
3315     alglib_impl::ae_state_clear(&_alglib_env_state);
3316     return;
3317 }
3318 
3319 /*************************************************************************
3320 Multiplication of MxN complex matrix by NxN random Haar distributed
3321 complex orthogonal matrix
3322 
3323 INPUT PARAMETERS:
3324     A   -   matrix, array[0..M-1, 0..N-1]
3325     M, N-   matrix size
3326 
3327 OUTPUT PARAMETERS:
3328     A   -   A*Q, where Q is random NxN orthogonal matrix
3329 
3330   -- ALGLIB routine --
3331      04.12.2009
3332      Bochkanov Sergey
3333 *************************************************************************/
cmatrixrndorthogonalfromtheright(complex_2d_array & a,const ae_int_t m,const ae_int_t n,const xparams _xparams)3334 void cmatrixrndorthogonalfromtheright(complex_2d_array &a, const ae_int_t m, const ae_int_t n, const xparams _xparams)
3335 {
3336     jmp_buf _break_jump;
3337     alglib_impl::ae_state _alglib_env_state;
3338     alglib_impl::ae_state_init(&_alglib_env_state);
3339     if( setjmp(_break_jump) )
3340     {
3341 #if !defined(AE_NO_EXCEPTIONS)
3342         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3343 #else
3344         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3345         return;
3346 #endif
3347     }
3348     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3349     if( _xparams.flags!=0x0 )
3350         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3351     alglib_impl::cmatrixrndorthogonalfromtheright(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
3352     alglib_impl::ae_state_clear(&_alglib_env_state);
3353     return;
3354 }
3355 
3356 /*************************************************************************
3357 Multiplication of MxN complex matrix by MxM random Haar distributed
3358 complex orthogonal matrix
3359 
3360 INPUT PARAMETERS:
3361     A   -   matrix, array[0..M-1, 0..N-1]
3362     M, N-   matrix size
3363 
3364 OUTPUT PARAMETERS:
3365     A   -   Q*A, where Q is random MxM orthogonal matrix
3366 
3367   -- ALGLIB routine --
3368      04.12.2009
3369      Bochkanov Sergey
3370 *************************************************************************/
cmatrixrndorthogonalfromtheleft(complex_2d_array & a,const ae_int_t m,const ae_int_t n,const xparams _xparams)3371 void cmatrixrndorthogonalfromtheleft(complex_2d_array &a, const ae_int_t m, const ae_int_t n, const xparams _xparams)
3372 {
3373     jmp_buf _break_jump;
3374     alglib_impl::ae_state _alglib_env_state;
3375     alglib_impl::ae_state_init(&_alglib_env_state);
3376     if( setjmp(_break_jump) )
3377     {
3378 #if !defined(AE_NO_EXCEPTIONS)
3379         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3380 #else
3381         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3382         return;
3383 #endif
3384     }
3385     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3386     if( _xparams.flags!=0x0 )
3387         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3388     alglib_impl::cmatrixrndorthogonalfromtheleft(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, &_alglib_env_state);
3389     alglib_impl::ae_state_clear(&_alglib_env_state);
3390     return;
3391 }
3392 
3393 /*************************************************************************
3394 Symmetric multiplication of NxN matrix by random Haar distributed
3395 orthogonal  matrix
3396 
3397 INPUT PARAMETERS:
3398     A   -   matrix, array[0..N-1, 0..N-1]
3399     N   -   matrix size
3400 
3401 OUTPUT PARAMETERS:
3402     A   -   Q'*A*Q, where Q is random NxN orthogonal matrix
3403 
3404   -- ALGLIB routine --
3405      04.12.2009
3406      Bochkanov Sergey
3407 *************************************************************************/
smatrixrndmultiply(real_2d_array & a,const ae_int_t n,const xparams _xparams)3408 void smatrixrndmultiply(real_2d_array &a, const ae_int_t n, const xparams _xparams)
3409 {
3410     jmp_buf _break_jump;
3411     alglib_impl::ae_state _alglib_env_state;
3412     alglib_impl::ae_state_init(&_alglib_env_state);
3413     if( setjmp(_break_jump) )
3414     {
3415 #if !defined(AE_NO_EXCEPTIONS)
3416         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3417 #else
3418         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3419         return;
3420 #endif
3421     }
3422     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3423     if( _xparams.flags!=0x0 )
3424         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3425     alglib_impl::smatrixrndmultiply(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
3426     alglib_impl::ae_state_clear(&_alglib_env_state);
3427     return;
3428 }
3429 
3430 /*************************************************************************
3431 Hermitian multiplication of NxN matrix by random Haar distributed
3432 complex orthogonal matrix
3433 
3434 INPUT PARAMETERS:
3435     A   -   matrix, array[0..N-1, 0..N-1]
3436     N   -   matrix size
3437 
3438 OUTPUT PARAMETERS:
3439     A   -   Q^H*A*Q, where Q is random NxN orthogonal matrix
3440 
3441   -- ALGLIB routine --
3442      04.12.2009
3443      Bochkanov Sergey
3444 *************************************************************************/
hmatrixrndmultiply(complex_2d_array & a,const ae_int_t n,const xparams _xparams)3445 void hmatrixrndmultiply(complex_2d_array &a, const ae_int_t n, const xparams _xparams)
3446 {
3447     jmp_buf _break_jump;
3448     alglib_impl::ae_state _alglib_env_state;
3449     alglib_impl::ae_state_init(&_alglib_env_state);
3450     if( setjmp(_break_jump) )
3451     {
3452 #if !defined(AE_NO_EXCEPTIONS)
3453         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3454 #else
3455         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3456         return;
3457 #endif
3458     }
3459     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3460     if( _xparams.flags!=0x0 )
3461         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3462     alglib_impl::hmatrixrndmultiply(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
3463     alglib_impl::ae_state_clear(&_alglib_env_state);
3464     return;
3465 }
3466 #endif
3467 
3468 #if defined(AE_COMPILE_SPARSE) || !defined(AE_PARTIAL_BUILD)
3469 /*************************************************************************
3470 Sparse matrix structure.
3471 
3472 You should use ALGLIB functions to work with sparse matrix. Never  try  to
3473 access its fields directly!
3474 
3475 NOTES ON THE SPARSE STORAGE FORMATS
3476 
3477 Sparse matrices can be stored using several formats:
3478 * Hash-Table representation
3479 * Compressed Row Storage (CRS)
3480 * Skyline matrix storage (SKS)
3481 
3482 Each of the formats has benefits and drawbacks:
3483 * Hash-table is good for dynamic operations (insertion of new elements),
3484   but does not support linear algebra operations
3485 * CRS is good for operations like matrix-vector or matrix-matrix products,
3486   but its initialization is less convenient - you have to tell row   sizes
3487   at the initialization, and you have to fill  matrix  only  row  by  row,
3488   from left to right.
3489 * SKS is a special format which is used to store triangular  factors  from
3490   Cholesky factorization. It does not support  dynamic  modification,  and
3491   support for linear algebra operations is very limited.
3492 
3493 Tables below outline information about these two formats:
3494 
3495     OPERATIONS WITH MATRIX      HASH        CRS         SKS
3496     creation                    +           +           +
3497     SparseGet                   +           +           +
3498     SparseExists                +           +           +
3499     SparseRewriteExisting       +           +           +
3500     SparseSet                   +           +           +
3501     SparseAdd                   +
3502     SparseGetRow                            +           +
3503     SparseGetCompressedRow                  +           +
3504     sparse-dense linear algebra             +           +
3505 *************************************************************************/
_sparsematrix_owner()3506 _sparsematrix_owner::_sparsematrix_owner()
3507 {
3508     jmp_buf _break_jump;
3509     alglib_impl::ae_state _state;
3510 
3511     alglib_impl::ae_state_init(&_state);
3512     if( setjmp(_break_jump) )
3513     {
3514         if( p_struct!=NULL )
3515         {
3516             alglib_impl::_sparsematrix_destroy(p_struct);
3517             alglib_impl::ae_free(p_struct);
3518         }
3519         p_struct = NULL;
3520 #if !defined(AE_NO_EXCEPTIONS)
3521         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
3522 #else
3523         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
3524         return;
3525 #endif
3526     }
3527     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
3528     p_struct = NULL;
3529     p_struct = (alglib_impl::sparsematrix*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsematrix), &_state);
3530     memset(p_struct, 0, sizeof(alglib_impl::sparsematrix));
3531     alglib_impl::_sparsematrix_init(p_struct, &_state, ae_false);
3532     ae_state_clear(&_state);
3533 }
3534 
_sparsematrix_owner(const _sparsematrix_owner & rhs)3535 _sparsematrix_owner::_sparsematrix_owner(const _sparsematrix_owner &rhs)
3536 {
3537     jmp_buf _break_jump;
3538     alglib_impl::ae_state _state;
3539 
3540     alglib_impl::ae_state_init(&_state);
3541     if( setjmp(_break_jump) )
3542     {
3543         if( p_struct!=NULL )
3544         {
3545             alglib_impl::_sparsematrix_destroy(p_struct);
3546             alglib_impl::ae_free(p_struct);
3547         }
3548         p_struct = NULL;
3549 #if !defined(AE_NO_EXCEPTIONS)
3550         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
3551 #else
3552         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
3553         return;
3554 #endif
3555     }
3556     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
3557     p_struct = NULL;
3558     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsematrix copy constructor failure (source is not initialized)", &_state);
3559     p_struct = (alglib_impl::sparsematrix*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsematrix), &_state);
3560     memset(p_struct, 0, sizeof(alglib_impl::sparsematrix));
3561     alglib_impl::_sparsematrix_init_copy(p_struct, const_cast<alglib_impl::sparsematrix*>(rhs.p_struct), &_state, ae_false);
3562     ae_state_clear(&_state);
3563 }
3564 
operator =(const _sparsematrix_owner & rhs)3565 _sparsematrix_owner& _sparsematrix_owner::operator=(const _sparsematrix_owner &rhs)
3566 {
3567     if( this==&rhs )
3568         return *this;
3569     jmp_buf _break_jump;
3570     alglib_impl::ae_state _state;
3571 
3572     alglib_impl::ae_state_init(&_state);
3573     if( setjmp(_break_jump) )
3574     {
3575 #if !defined(AE_NO_EXCEPTIONS)
3576         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
3577 #else
3578         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
3579         return *this;
3580 #endif
3581     }
3582     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
3583     alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: sparsematrix assignment constructor failure (destination is not initialized)", &_state);
3584     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsematrix assignment constructor failure (source is not initialized)", &_state);
3585     alglib_impl::_sparsematrix_destroy(p_struct);
3586     memset(p_struct, 0, sizeof(alglib_impl::sparsematrix));
3587     alglib_impl::_sparsematrix_init_copy(p_struct, const_cast<alglib_impl::sparsematrix*>(rhs.p_struct), &_state, ae_false);
3588     ae_state_clear(&_state);
3589     return *this;
3590 }
3591 
~_sparsematrix_owner()3592 _sparsematrix_owner::~_sparsematrix_owner()
3593 {
3594     if( p_struct!=NULL )
3595     {
3596         alglib_impl::_sparsematrix_destroy(p_struct);
3597         ae_free(p_struct);
3598     }
3599 }
3600 
c_ptr()3601 alglib_impl::sparsematrix* _sparsematrix_owner::c_ptr()
3602 {
3603     return p_struct;
3604 }
3605 
c_ptr() const3606 alglib_impl::sparsematrix* _sparsematrix_owner::c_ptr() const
3607 {
3608     return const_cast<alglib_impl::sparsematrix*>(p_struct);
3609 }
sparsematrix()3610 sparsematrix::sparsematrix() : _sparsematrix_owner()
3611 {
3612 }
3613 
sparsematrix(const sparsematrix & rhs)3614 sparsematrix::sparsematrix(const sparsematrix &rhs):_sparsematrix_owner(rhs)
3615 {
3616 }
3617 
operator =(const sparsematrix & rhs)3618 sparsematrix& sparsematrix::operator=(const sparsematrix &rhs)
3619 {
3620     if( this==&rhs )
3621         return *this;
3622     _sparsematrix_owner::operator=(rhs);
3623     return *this;
3624 }
3625 
~sparsematrix()3626 sparsematrix::~sparsematrix()
3627 {
3628 }
3629 
3630 
3631 /*************************************************************************
3632 Temporary buffers for sparse matrix operations.
3633 
3634 You should pass an instance of this structure to factorization  functions.
3635 It allows to reuse memory during repeated sparse  factorizations.  You  do
3636 not have to call some initialization function - simply passing an instance
3637 to factorization function is enough.
3638 *************************************************************************/
_sparsebuffers_owner()3639 _sparsebuffers_owner::_sparsebuffers_owner()
3640 {
3641     jmp_buf _break_jump;
3642     alglib_impl::ae_state _state;
3643 
3644     alglib_impl::ae_state_init(&_state);
3645     if( setjmp(_break_jump) )
3646     {
3647         if( p_struct!=NULL )
3648         {
3649             alglib_impl::_sparsebuffers_destroy(p_struct);
3650             alglib_impl::ae_free(p_struct);
3651         }
3652         p_struct = NULL;
3653 #if !defined(AE_NO_EXCEPTIONS)
3654         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
3655 #else
3656         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
3657         return;
3658 #endif
3659     }
3660     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
3661     p_struct = NULL;
3662     p_struct = (alglib_impl::sparsebuffers*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsebuffers), &_state);
3663     memset(p_struct, 0, sizeof(alglib_impl::sparsebuffers));
3664     alglib_impl::_sparsebuffers_init(p_struct, &_state, ae_false);
3665     ae_state_clear(&_state);
3666 }
3667 
_sparsebuffers_owner(const _sparsebuffers_owner & rhs)3668 _sparsebuffers_owner::_sparsebuffers_owner(const _sparsebuffers_owner &rhs)
3669 {
3670     jmp_buf _break_jump;
3671     alglib_impl::ae_state _state;
3672 
3673     alglib_impl::ae_state_init(&_state);
3674     if( setjmp(_break_jump) )
3675     {
3676         if( p_struct!=NULL )
3677         {
3678             alglib_impl::_sparsebuffers_destroy(p_struct);
3679             alglib_impl::ae_free(p_struct);
3680         }
3681         p_struct = NULL;
3682 #if !defined(AE_NO_EXCEPTIONS)
3683         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
3684 #else
3685         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
3686         return;
3687 #endif
3688     }
3689     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
3690     p_struct = NULL;
3691     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsebuffers copy constructor failure (source is not initialized)", &_state);
3692     p_struct = (alglib_impl::sparsebuffers*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsebuffers), &_state);
3693     memset(p_struct, 0, sizeof(alglib_impl::sparsebuffers));
3694     alglib_impl::_sparsebuffers_init_copy(p_struct, const_cast<alglib_impl::sparsebuffers*>(rhs.p_struct), &_state, ae_false);
3695     ae_state_clear(&_state);
3696 }
3697 
operator =(const _sparsebuffers_owner & rhs)3698 _sparsebuffers_owner& _sparsebuffers_owner::operator=(const _sparsebuffers_owner &rhs)
3699 {
3700     if( this==&rhs )
3701         return *this;
3702     jmp_buf _break_jump;
3703     alglib_impl::ae_state _state;
3704 
3705     alglib_impl::ae_state_init(&_state);
3706     if( setjmp(_break_jump) )
3707     {
3708 #if !defined(AE_NO_EXCEPTIONS)
3709         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
3710 #else
3711         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
3712         return *this;
3713 #endif
3714     }
3715     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
3716     alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: sparsebuffers assignment constructor failure (destination is not initialized)", &_state);
3717     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsebuffers assignment constructor failure (source is not initialized)", &_state);
3718     alglib_impl::_sparsebuffers_destroy(p_struct);
3719     memset(p_struct, 0, sizeof(alglib_impl::sparsebuffers));
3720     alglib_impl::_sparsebuffers_init_copy(p_struct, const_cast<alglib_impl::sparsebuffers*>(rhs.p_struct), &_state, ae_false);
3721     ae_state_clear(&_state);
3722     return *this;
3723 }
3724 
~_sparsebuffers_owner()3725 _sparsebuffers_owner::~_sparsebuffers_owner()
3726 {
3727     if( p_struct!=NULL )
3728     {
3729         alglib_impl::_sparsebuffers_destroy(p_struct);
3730         ae_free(p_struct);
3731     }
3732 }
3733 
c_ptr()3734 alglib_impl::sparsebuffers* _sparsebuffers_owner::c_ptr()
3735 {
3736     return p_struct;
3737 }
3738 
c_ptr() const3739 alglib_impl::sparsebuffers* _sparsebuffers_owner::c_ptr() const
3740 {
3741     return const_cast<alglib_impl::sparsebuffers*>(p_struct);
3742 }
sparsebuffers()3743 sparsebuffers::sparsebuffers() : _sparsebuffers_owner()
3744 {
3745 }
3746 
sparsebuffers(const sparsebuffers & rhs)3747 sparsebuffers::sparsebuffers(const sparsebuffers &rhs):_sparsebuffers_owner(rhs)
3748 {
3749 }
3750 
operator =(const sparsebuffers & rhs)3751 sparsebuffers& sparsebuffers::operator=(const sparsebuffers &rhs)
3752 {
3753     if( this==&rhs )
3754         return *this;
3755     _sparsebuffers_owner::operator=(rhs);
3756     return *this;
3757 }
3758 
~sparsebuffers()3759 sparsebuffers::~sparsebuffers()
3760 {
3761 }
3762 
3763 
3764 /*************************************************************************
3765 This function serializes data structure to string.
3766 
3767 Important properties of s_out:
3768 * it contains alphanumeric characters, dots, underscores, minus signs
3769 * these symbols are grouped into words, which are separated by spaces
3770   and Windows-style (CR+LF) newlines
3771 * although  serializer  uses  spaces and CR+LF as separators, you can
3772   replace any separator character by arbitrary combination of spaces,
3773   tabs, Windows or Unix newlines. It allows flexible reformatting  of
3774   the  string  in  case you want to include it into text or XML file.
3775   But you should not insert separators into the middle of the "words"
3776   nor you should change case of letters.
3777 * s_out can be freely moved between 32-bit and 64-bit systems, little
3778   and big endian machines, and so on. You can serialize structure  on
3779   32-bit machine and unserialize it on 64-bit one (or vice versa), or
3780   serialize  it  on  SPARC  and  unserialize  on  x86.  You  can also
3781   serialize  it  in  C++ version of ALGLIB and unserialize in C# one,
3782   and vice versa.
3783 *************************************************************************/
sparseserialize(sparsematrix & obj,std::string & s_out)3784 void sparseserialize(sparsematrix &obj, std::string &s_out)
3785 {
3786     jmp_buf _break_jump;
3787     alglib_impl::ae_state state;
3788     alglib_impl::ae_serializer serializer;
3789     alglib_impl::ae_int_t ssize;
3790 
3791     alglib_impl::ae_state_init(&state);
3792     if( setjmp(_break_jump) )
3793     {
3794 #if !defined(AE_NO_EXCEPTIONS)
3795         _ALGLIB_CPP_EXCEPTION(state.error_msg);
3796 #else
3797         _ALGLIB_SET_ERROR_FLAG(state.error_msg);
3798         return;
3799 #endif
3800     }
3801     ae_state_set_break_jump(&state, &_break_jump);
3802     alglib_impl::ae_serializer_init(&serializer);
3803     alglib_impl::ae_serializer_alloc_start(&serializer);
3804     alglib_impl::sparsealloc(&serializer, obj.c_ptr(), &state);
3805     ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
3806     s_out.clear();
3807     s_out.reserve((size_t)(ssize+1));
3808     alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
3809     alglib_impl::sparseserialize(&serializer, obj.c_ptr(), &state);
3810     alglib_impl::ae_serializer_stop(&serializer, &state);
3811     alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
3812     alglib_impl::ae_serializer_clear(&serializer);
3813     alglib_impl::ae_state_clear(&state);
3814 }
3815 /*************************************************************************
3816 This function unserializes data structure from string.
3817 *************************************************************************/
sparseunserialize(const std::string & s_in,sparsematrix & obj)3818 void sparseunserialize(const std::string &s_in, sparsematrix &obj)
3819 {
3820     jmp_buf _break_jump;
3821     alglib_impl::ae_state state;
3822     alglib_impl::ae_serializer serializer;
3823 
3824     alglib_impl::ae_state_init(&state);
3825     if( setjmp(_break_jump) )
3826     {
3827 #if !defined(AE_NO_EXCEPTIONS)
3828         _ALGLIB_CPP_EXCEPTION(state.error_msg);
3829 #else
3830         _ALGLIB_SET_ERROR_FLAG(state.error_msg);
3831         return;
3832 #endif
3833     }
3834     ae_state_set_break_jump(&state, &_break_jump);
3835     alglib_impl::ae_serializer_init(&serializer);
3836     alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
3837     alglib_impl::sparseunserialize(&serializer, obj.c_ptr(), &state);
3838     alglib_impl::ae_serializer_stop(&serializer, &state);
3839     alglib_impl::ae_serializer_clear(&serializer);
3840     alglib_impl::ae_state_clear(&state);
3841 }
3842 
3843 
3844 /*************************************************************************
3845 This function serializes data structure to C++ stream.
3846 
3847 Data stream generated by this function is same as  string  representation
3848 generated  by  string  version  of  serializer - alphanumeric characters,
3849 dots, underscores, minus signs, which are grouped into words separated by
3850 spaces and CR+LF.
3851 
3852 We recommend you to read comments on string version of serializer to find
3853 out more about serialization of AlGLIB objects.
3854 *************************************************************************/
sparseserialize(sparsematrix & obj,std::ostream & s_out)3855 void sparseserialize(sparsematrix &obj, std::ostream &s_out)
3856 {
3857     jmp_buf _break_jump;
3858     alglib_impl::ae_state state;
3859     alglib_impl::ae_serializer serializer;
3860 
3861     alglib_impl::ae_state_init(&state);
3862     if( setjmp(_break_jump) )
3863     {
3864 #if !defined(AE_NO_EXCEPTIONS)
3865         _ALGLIB_CPP_EXCEPTION(state.error_msg);
3866 #else
3867         _ALGLIB_SET_ERROR_FLAG(state.error_msg);
3868         return;
3869 #endif
3870     }
3871     ae_state_set_break_jump(&state, &_break_jump);
3872     alglib_impl::ae_serializer_init(&serializer);
3873     alglib_impl::ae_serializer_alloc_start(&serializer);
3874     alglib_impl::sparsealloc(&serializer, obj.c_ptr(), &state);
3875     alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
3876     alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
3877     alglib_impl::sparseserialize(&serializer, obj.c_ptr(), &state);
3878     alglib_impl::ae_serializer_stop(&serializer, &state);
3879     alglib_impl::ae_serializer_clear(&serializer);
3880     alglib_impl::ae_state_clear(&state);
3881 }
3882 /*************************************************************************
3883 This function unserializes data structure from stream.
3884 *************************************************************************/
sparseunserialize(const std::istream & s_in,sparsematrix & obj)3885 void sparseunserialize(const std::istream &s_in, sparsematrix &obj)
3886 {
3887     jmp_buf _break_jump;
3888     alglib_impl::ae_state state;
3889     alglib_impl::ae_serializer serializer;
3890 
3891     alglib_impl::ae_state_init(&state);
3892     if( setjmp(_break_jump) )
3893     {
3894 #if !defined(AE_NO_EXCEPTIONS)
3895         _ALGLIB_CPP_EXCEPTION(state.error_msg);
3896 #else
3897         _ALGLIB_SET_ERROR_FLAG(state.error_msg);
3898         return;
3899 #endif
3900     }
3901     ae_state_set_break_jump(&state, &_break_jump);
3902     alglib_impl::ae_serializer_init(&serializer);
3903     alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
3904     alglib_impl::sparseunserialize(&serializer, obj.c_ptr(), &state);
3905     alglib_impl::ae_serializer_stop(&serializer, &state);
3906     alglib_impl::ae_serializer_clear(&serializer);
3907     alglib_impl::ae_state_clear(&state);
3908 }
3909 
3910 /*************************************************************************
3911 This function creates sparse matrix in a Hash-Table format.
3912 
3913 This function creates Hast-Table matrix, which can be  converted  to  CRS
3914 format after its initialization is over. Typical  usage  scenario  for  a
3915 sparse matrix is:
3916 1. creation in a Hash-Table format
3917 2. insertion of the matrix elements
3918 3. conversion to the CRS representation
3919 4. matrix is passed to some linear algebra algorithm
3920 
3921 Some  information  about  different matrix formats can be found below, in
3922 the "NOTES" section.
3923 
3924 INPUT PARAMETERS
3925     M           -   number of rows in a matrix, M>=1
3926     N           -   number of columns in a matrix, N>=1
3927     K           -   K>=0, expected number of non-zero elements in a matrix.
3928                     K can be inexact approximation, can be less than actual
3929                     number  of  elements  (table will grow when needed) or
3930                     even zero).
3931                     It is important to understand that although hash-table
3932                     may grow automatically, it is better to  provide  good
3933                     estimate of data size.
3934 
3935 OUTPUT PARAMETERS
3936     S           -   sparse M*N matrix in Hash-Table representation.
3937                     All elements of the matrix are zero.
3938 
3939 NOTE 1
3940 
3941 Hash-tables use memory inefficiently, and they have to keep  some  amount
3942 of the "spare memory" in order to have good performance. Hash  table  for
3943 matrix with K non-zero elements will  need  C*K*(8+2*sizeof(int))  bytes,
3944 where C is a small constant, about 1.5-2 in magnitude.
3945 
3946 CRS storage, from the other side, is  more  memory-efficient,  and  needs
3947 just K*(8+sizeof(int))+M*sizeof(int) bytes, where M is a number  of  rows
3948 in a matrix.
3949 
3950 When you convert from the Hash-Table to CRS  representation, all unneeded
3951 memory will be freed.
3952 
3953 NOTE 2
3954 
3955 Comments of SparseMatrix structure outline  information  about  different
3956 sparse storage formats. We recommend you to read them before starting  to
3957 use ALGLIB sparse matrices.
3958 
3959 NOTE 3
3960 
3961 This function completely  overwrites S with new sparse matrix. Previously
3962 allocated storage is NOT reused. If you  want  to reuse already allocated
3963 memory, call SparseCreateBuf function.
3964 
3965   -- ALGLIB PROJECT --
3966      Copyright 14.10.2011 by Bochkanov Sergey
3967 *************************************************************************/
sparsecreate(const ae_int_t m,const ae_int_t n,const ae_int_t k,sparsematrix & s,const xparams _xparams)3968 void sparsecreate(const ae_int_t m, const ae_int_t n, const ae_int_t k, sparsematrix &s, const xparams _xparams)
3969 {
3970     jmp_buf _break_jump;
3971     alglib_impl::ae_state _alglib_env_state;
3972     alglib_impl::ae_state_init(&_alglib_env_state);
3973     if( setjmp(_break_jump) )
3974     {
3975 #if !defined(AE_NO_EXCEPTIONS)
3976         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3977 #else
3978         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3979         return;
3980 #endif
3981     }
3982     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3983     if( _xparams.flags!=0x0 )
3984         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3985     alglib_impl::sparsecreate(m, n, k, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
3986     alglib_impl::ae_state_clear(&_alglib_env_state);
3987     return;
3988 }
3989 
3990 /*************************************************************************
3991 This function creates sparse matrix in a Hash-Table format.
3992 
3993 This function creates Hast-Table matrix, which can be  converted  to  CRS
3994 format after its initialization is over. Typical  usage  scenario  for  a
3995 sparse matrix is:
3996 1. creation in a Hash-Table format
3997 2. insertion of the matrix elements
3998 3. conversion to the CRS representation
3999 4. matrix is passed to some linear algebra algorithm
4000 
4001 Some  information  about  different matrix formats can be found below, in
4002 the "NOTES" section.
4003 
4004 INPUT PARAMETERS
4005     M           -   number of rows in a matrix, M>=1
4006     N           -   number of columns in a matrix, N>=1
4007     K           -   K>=0, expected number of non-zero elements in a matrix.
4008                     K can be inexact approximation, can be less than actual
4009                     number  of  elements  (table will grow when needed) or
4010                     even zero).
4011                     It is important to understand that although hash-table
4012                     may grow automatically, it is better to  provide  good
4013                     estimate of data size.
4014 
4015 OUTPUT PARAMETERS
4016     S           -   sparse M*N matrix in Hash-Table representation.
4017                     All elements of the matrix are zero.
4018 
4019 NOTE 1
4020 
4021 Hash-tables use memory inefficiently, and they have to keep  some  amount
4022 of the "spare memory" in order to have good performance. Hash  table  for
4023 matrix with K non-zero elements will  need  C*K*(8+2*sizeof(int))  bytes,
4024 where C is a small constant, about 1.5-2 in magnitude.
4025 
4026 CRS storage, from the other side, is  more  memory-efficient,  and  needs
4027 just K*(8+sizeof(int))+M*sizeof(int) bytes, where M is a number  of  rows
4028 in a matrix.
4029 
4030 When you convert from the Hash-Table to CRS  representation, all unneeded
4031 memory will be freed.
4032 
4033 NOTE 2
4034 
4035 Comments of SparseMatrix structure outline  information  about  different
4036 sparse storage formats. We recommend you to read them before starting  to
4037 use ALGLIB sparse matrices.
4038 
4039 NOTE 3
4040 
4041 This function completely  overwrites S with new sparse matrix. Previously
4042 allocated storage is NOT reused. If you  want  to reuse already allocated
4043 memory, call SparseCreateBuf function.
4044 
4045   -- ALGLIB PROJECT --
4046      Copyright 14.10.2011 by Bochkanov Sergey
4047 *************************************************************************/
4048 #if !defined(AE_NO_EXCEPTIONS)
sparsecreate(const ae_int_t m,const ae_int_t n,sparsematrix & s,const xparams _xparams)4049 void sparsecreate(const ae_int_t m, const ae_int_t n, sparsematrix &s, const xparams _xparams)
4050 {
4051     jmp_buf _break_jump;
4052     alglib_impl::ae_state _alglib_env_state;
4053     ae_int_t k;
4054 
4055     k = 0;
4056     alglib_impl::ae_state_init(&_alglib_env_state);
4057     if( setjmp(_break_jump) )
4058         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4059     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4060     if( _xparams.flags!=0x0 )
4061         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4062     alglib_impl::sparsecreate(m, n, k, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
4063 
4064     alglib_impl::ae_state_clear(&_alglib_env_state);
4065     return;
4066 }
4067 #endif
4068 
4069 /*************************************************************************
4070 This version of SparseCreate function creates sparse matrix in Hash-Table
4071 format, reusing previously allocated storage as much  as  possible.  Read
4072 comments for SparseCreate() for more information.
4073 
4074 INPUT PARAMETERS
4075     M           -   number of rows in a matrix, M>=1
4076     N           -   number of columns in a matrix, N>=1
4077     K           -   K>=0, expected number of non-zero elements in a matrix.
4078                     K can be inexact approximation, can be less than actual
4079                     number  of  elements  (table will grow when needed) or
4080                     even zero).
4081                     It is important to understand that although hash-table
4082                     may grow automatically, it is better to  provide  good
4083                     estimate of data size.
4084     S           -   SparseMatrix structure which MAY contain some  already
4085                     allocated storage.
4086 
4087 OUTPUT PARAMETERS
4088     S           -   sparse M*N matrix in Hash-Table representation.
4089                     All elements of the matrix are zero.
4090                     Previously allocated storage is reused, if  its  size
4091                     is compatible with expected number of non-zeros K.
4092 
4093   -- ALGLIB PROJECT --
4094      Copyright 14.01.2014 by Bochkanov Sergey
4095 *************************************************************************/
sparsecreatebuf(const ae_int_t m,const ae_int_t n,const ae_int_t k,const sparsematrix & s,const xparams _xparams)4096 void sparsecreatebuf(const ae_int_t m, const ae_int_t n, const ae_int_t k, const sparsematrix &s, const xparams _xparams)
4097 {
4098     jmp_buf _break_jump;
4099     alglib_impl::ae_state _alglib_env_state;
4100     alglib_impl::ae_state_init(&_alglib_env_state);
4101     if( setjmp(_break_jump) )
4102     {
4103 #if !defined(AE_NO_EXCEPTIONS)
4104         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4105 #else
4106         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4107         return;
4108 #endif
4109     }
4110     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4111     if( _xparams.flags!=0x0 )
4112         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4113     alglib_impl::sparsecreatebuf(m, n, k, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
4114     alglib_impl::ae_state_clear(&_alglib_env_state);
4115     return;
4116 }
4117 
4118 /*************************************************************************
4119 This version of SparseCreate function creates sparse matrix in Hash-Table
4120 format, reusing previously allocated storage as much  as  possible.  Read
4121 comments for SparseCreate() for more information.
4122 
4123 INPUT PARAMETERS
4124     M           -   number of rows in a matrix, M>=1
4125     N           -   number of columns in a matrix, N>=1
4126     K           -   K>=0, expected number of non-zero elements in a matrix.
4127                     K can be inexact approximation, can be less than actual
4128                     number  of  elements  (table will grow when needed) or
4129                     even zero).
4130                     It is important to understand that although hash-table
4131                     may grow automatically, it is better to  provide  good
4132                     estimate of data size.
4133     S           -   SparseMatrix structure which MAY contain some  already
4134                     allocated storage.
4135 
4136 OUTPUT PARAMETERS
4137     S           -   sparse M*N matrix in Hash-Table representation.
4138                     All elements of the matrix are zero.
4139                     Previously allocated storage is reused, if  its  size
4140                     is compatible with expected number of non-zeros K.
4141 
4142   -- ALGLIB PROJECT --
4143      Copyright 14.01.2014 by Bochkanov Sergey
4144 *************************************************************************/
4145 #if !defined(AE_NO_EXCEPTIONS)
sparsecreatebuf(const ae_int_t m,const ae_int_t n,const sparsematrix & s,const xparams _xparams)4146 void sparsecreatebuf(const ae_int_t m, const ae_int_t n, const sparsematrix &s, const xparams _xparams)
4147 {
4148     jmp_buf _break_jump;
4149     alglib_impl::ae_state _alglib_env_state;
4150     ae_int_t k;
4151 
4152     k = 0;
4153     alglib_impl::ae_state_init(&_alglib_env_state);
4154     if( setjmp(_break_jump) )
4155         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4156     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4157     if( _xparams.flags!=0x0 )
4158         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4159     alglib_impl::sparsecreatebuf(m, n, k, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
4160 
4161     alglib_impl::ae_state_clear(&_alglib_env_state);
4162     return;
4163 }
4164 #endif
4165 
4166 /*************************************************************************
4167 This function creates sparse matrix in a CRS format (expert function for
4168 situations when you are running out of memory).
4169 
4170 This function creates CRS matrix. Typical usage scenario for a CRS matrix
4171 is:
4172 1. creation (you have to tell number of non-zero elements at each row  at
4173    this moment)
4174 2. insertion of the matrix elements (row by row, from left to right)
4175 3. matrix is passed to some linear algebra algorithm
4176 
4177 This function is a memory-efficient alternative to SparseCreate(), but it
4178 is more complex because it requires you to know in advance how large your
4179 matrix is. Some  information about  different matrix formats can be found
4180 in comments on SparseMatrix structure.  We recommend  you  to  read  them
4181 before starting to use ALGLIB sparse matrices..
4182 
4183 INPUT PARAMETERS
4184     M           -   number of rows in a matrix, M>=1
4185     N           -   number of columns in a matrix, N>=1
4186     NER         -   number of elements at each row, array[M], NER[I]>=0
4187 
4188 OUTPUT PARAMETERS
4189     S           -   sparse M*N matrix in CRS representation.
4190                     You have to fill ALL non-zero elements by calling
4191                     SparseSet() BEFORE you try to use this matrix.
4192 
4193 NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
4194       Previously allocated storage is NOT reused. If you  want  to  reuse
4195       already allocated memory, call SparseCreateCRSBuf function.
4196 
4197   -- ALGLIB PROJECT --
4198      Copyright 14.10.2011 by Bochkanov Sergey
4199 *************************************************************************/
sparsecreatecrs(const ae_int_t m,const ae_int_t n,const integer_1d_array & ner,sparsematrix & s,const xparams _xparams)4200 void sparsecreatecrs(const ae_int_t m, const ae_int_t n, const integer_1d_array &ner, sparsematrix &s, const xparams _xparams)
4201 {
4202     jmp_buf _break_jump;
4203     alglib_impl::ae_state _alglib_env_state;
4204     alglib_impl::ae_state_init(&_alglib_env_state);
4205     if( setjmp(_break_jump) )
4206     {
4207 #if !defined(AE_NO_EXCEPTIONS)
4208         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4209 #else
4210         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4211         return;
4212 #endif
4213     }
4214     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4215     if( _xparams.flags!=0x0 )
4216         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4217     alglib_impl::sparsecreatecrs(m, n, const_cast<alglib_impl::ae_vector*>(ner.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
4218     alglib_impl::ae_state_clear(&_alglib_env_state);
4219     return;
4220 }
4221 
4222 /*************************************************************************
4223 This function creates sparse matrix in a CRS format (expert function  for
4224 situations when you are running out  of  memory).  This  version  of  CRS
4225 matrix creation function may reuse memory already allocated in S.
4226 
4227 This function creates CRS matrix. Typical usage scenario for a CRS matrix
4228 is:
4229 1. creation (you have to tell number of non-zero elements at each row  at
4230    this moment)
4231 2. insertion of the matrix elements (row by row, from left to right)
4232 3. matrix is passed to some linear algebra algorithm
4233 
4234 This function is a memory-efficient alternative to SparseCreate(), but it
4235 is more complex because it requires you to know in advance how large your
4236 matrix is. Some  information about  different matrix formats can be found
4237 in comments on SparseMatrix structure.  We recommend  you  to  read  them
4238 before starting to use ALGLIB sparse matrices..
4239 
4240 INPUT PARAMETERS
4241     M           -   number of rows in a matrix, M>=1
4242     N           -   number of columns in a matrix, N>=1
4243     NER         -   number of elements at each row, array[M], NER[I]>=0
4244     S           -   sparse matrix structure with possibly preallocated
4245                     memory.
4246 
4247 OUTPUT PARAMETERS
4248     S           -   sparse M*N matrix in CRS representation.
4249                     You have to fill ALL non-zero elements by calling
4250                     SparseSet() BEFORE you try to use this matrix.
4251 
4252   -- ALGLIB PROJECT --
4253      Copyright 14.10.2011 by Bochkanov Sergey
4254 *************************************************************************/
sparsecreatecrsbuf(const ae_int_t m,const ae_int_t n,const integer_1d_array & ner,const sparsematrix & s,const xparams _xparams)4255 void sparsecreatecrsbuf(const ae_int_t m, const ae_int_t n, const integer_1d_array &ner, const sparsematrix &s, const xparams _xparams)
4256 {
4257     jmp_buf _break_jump;
4258     alglib_impl::ae_state _alglib_env_state;
4259     alglib_impl::ae_state_init(&_alglib_env_state);
4260     if( setjmp(_break_jump) )
4261     {
4262 #if !defined(AE_NO_EXCEPTIONS)
4263         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4264 #else
4265         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4266         return;
4267 #endif
4268     }
4269     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4270     if( _xparams.flags!=0x0 )
4271         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4272     alglib_impl::sparsecreatecrsbuf(m, n, const_cast<alglib_impl::ae_vector*>(ner.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
4273     alglib_impl::ae_state_clear(&_alglib_env_state);
4274     return;
4275 }
4276 
4277 /*************************************************************************
4278 This function creates sparse matrix in  a  SKS  format  (skyline  storage
4279 format). In most cases you do not need this function - CRS format  better
4280 suits most use cases.
4281 
4282 INPUT PARAMETERS
4283     M, N        -   number of rows(M) and columns (N) in a matrix:
4284                     * M=N (as for now, ALGLIB supports only square SKS)
4285                     * N>=1
4286                     * M>=1
4287     D           -   "bottom" bandwidths, array[M], D[I]>=0.
4288                     I-th element stores number of non-zeros at I-th  row,
4289                     below the diagonal (diagonal itself is not  included)
4290     U           -   "top" bandwidths, array[N], U[I]>=0.
4291                     I-th element stores number of non-zeros  at I-th row,
4292                     above the diagonal (diagonal itself  is not included)
4293 
4294 OUTPUT PARAMETERS
4295     S           -   sparse M*N matrix in SKS representation.
4296                     All elements are filled by zeros.
4297                     You may use sparseset() to change their values.
4298 
4299 NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
4300       Previously allocated storage is NOT reused. If you  want  to  reuse
4301       already allocated memory, call SparseCreateSKSBuf function.
4302 
4303   -- ALGLIB PROJECT --
4304      Copyright 13.01.2014 by Bochkanov Sergey
4305 *************************************************************************/
sparsecreatesks(const ae_int_t m,const ae_int_t n,const integer_1d_array & d,const integer_1d_array & u,sparsematrix & s,const xparams _xparams)4306 void sparsecreatesks(const ae_int_t m, const ae_int_t n, const integer_1d_array &d, const integer_1d_array &u, sparsematrix &s, const xparams _xparams)
4307 {
4308     jmp_buf _break_jump;
4309     alglib_impl::ae_state _alglib_env_state;
4310     alglib_impl::ae_state_init(&_alglib_env_state);
4311     if( setjmp(_break_jump) )
4312     {
4313 #if !defined(AE_NO_EXCEPTIONS)
4314         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4315 #else
4316         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4317         return;
4318 #endif
4319     }
4320     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4321     if( _xparams.flags!=0x0 )
4322         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4323     alglib_impl::sparsecreatesks(m, n, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
4324     alglib_impl::ae_state_clear(&_alglib_env_state);
4325     return;
4326 }
4327 
4328 /*************************************************************************
4329 This is "buffered"  version  of  SparseCreateSKS()  which  reuses  memory
4330 previously allocated in S (of course, memory is reallocated if needed).
4331 
4332 This function creates sparse matrix in  a  SKS  format  (skyline  storage
4333 format). In most cases you do not need this function - CRS format  better
4334 suits most use cases.
4335 
4336 INPUT PARAMETERS
4337     M, N        -   number of rows(M) and columns (N) in a matrix:
4338                     * M=N (as for now, ALGLIB supports only square SKS)
4339                     * N>=1
4340                     * M>=1
4341     D           -   "bottom" bandwidths, array[M], 0<=D[I]<=I.
4342                     I-th element stores number of non-zeros at I-th row,
4343                     below the diagonal (diagonal itself is not included)
4344     U           -   "top" bandwidths, array[N], 0<=U[I]<=I.
4345                     I-th element stores number of non-zeros at I-th row,
4346                     above the diagonal (diagonal itself is not included)
4347 
4348 OUTPUT PARAMETERS
4349     S           -   sparse M*N matrix in SKS representation.
4350                     All elements are filled by zeros.
4351                     You may use sparseset() to change their values.
4352 
4353   -- ALGLIB PROJECT --
4354      Copyright 13.01.2014 by Bochkanov Sergey
4355 *************************************************************************/
sparsecreatesksbuf(const ae_int_t m,const ae_int_t n,const integer_1d_array & d,const integer_1d_array & u,const sparsematrix & s,const xparams _xparams)4356 void sparsecreatesksbuf(const ae_int_t m, const ae_int_t n, const integer_1d_array &d, const integer_1d_array &u, const sparsematrix &s, const xparams _xparams)
4357 {
4358     jmp_buf _break_jump;
4359     alglib_impl::ae_state _alglib_env_state;
4360     alglib_impl::ae_state_init(&_alglib_env_state);
4361     if( setjmp(_break_jump) )
4362     {
4363 #if !defined(AE_NO_EXCEPTIONS)
4364         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4365 #else
4366         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4367         return;
4368 #endif
4369     }
4370     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4371     if( _xparams.flags!=0x0 )
4372         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4373     alglib_impl::sparsecreatesksbuf(m, n, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
4374     alglib_impl::ae_state_clear(&_alglib_env_state);
4375     return;
4376 }
4377 
4378 /*************************************************************************
4379 This function creates sparse matrix in  a  SKS  format  (skyline  storage
4380 format). Unlike more general  sparsecreatesks(),  this  function  creates
4381 sparse matrix with constant bandwidth.
4382 
4383 You may want to use this function instead of sparsecreatesks() when  your
4384 matrix has  constant  or  nearly-constant  bandwidth,  and  you  want  to
4385 simplify source code.
4386 
4387 INPUT PARAMETERS
4388     M, N        -   number of rows(M) and columns (N) in a matrix:
4389                     * M=N (as for now, ALGLIB supports only square SKS)
4390                     * N>=1
4391                     * M>=1
4392     BW          -   matrix bandwidth, BW>=0
4393 
4394 OUTPUT PARAMETERS
4395     S           -   sparse M*N matrix in SKS representation.
4396                     All elements are filled by zeros.
4397                     You may use sparseset() to  change  their values.
4398 
4399 NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
4400       Previously allocated storage is NOT reused. If you  want  to  reuse
4401       already allocated memory, call sparsecreatesksbandbuf function.
4402 
4403   -- ALGLIB PROJECT --
4404      Copyright 25.12.2017 by Bochkanov Sergey
4405 *************************************************************************/
sparsecreatesksband(const ae_int_t m,const ae_int_t n,const ae_int_t bw,sparsematrix & s,const xparams _xparams)4406 void sparsecreatesksband(const ae_int_t m, const ae_int_t n, const ae_int_t bw, sparsematrix &s, const xparams _xparams)
4407 {
4408     jmp_buf _break_jump;
4409     alglib_impl::ae_state _alglib_env_state;
4410     alglib_impl::ae_state_init(&_alglib_env_state);
4411     if( setjmp(_break_jump) )
4412     {
4413 #if !defined(AE_NO_EXCEPTIONS)
4414         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4415 #else
4416         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4417         return;
4418 #endif
4419     }
4420     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4421     if( _xparams.flags!=0x0 )
4422         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4423     alglib_impl::sparsecreatesksband(m, n, bw, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
4424     alglib_impl::ae_state_clear(&_alglib_env_state);
4425     return;
4426 }
4427 
4428 /*************************************************************************
4429 This is "buffered" version  of  sparsecreatesksband() which reuses memory
4430 previously allocated in S (of course, memory is reallocated if needed).
4431 
4432 You may want to use this function instead  of  sparsecreatesksbuf()  when
4433 your matrix has  constant or nearly-constant  bandwidth,  and you want to
4434 simplify source code.
4435 
4436 INPUT PARAMETERS
4437     M, N        -   number of rows(M) and columns (N) in a matrix:
4438                     * M=N (as for now, ALGLIB supports only square SKS)
4439                     * N>=1
4440                     * M>=1
4441     BW          -   bandwidth, BW>=0
4442 
4443 OUTPUT PARAMETERS
4444     S           -   sparse M*N matrix in SKS representation.
4445                     All elements are filled by zeros.
4446                     You may use sparseset() to change their values.
4447 
4448   -- ALGLIB PROJECT --
4449      Copyright 13.01.2014 by Bochkanov Sergey
4450 *************************************************************************/
sparsecreatesksbandbuf(const ae_int_t m,const ae_int_t n,const ae_int_t bw,const sparsematrix & s,const xparams _xparams)4451 void sparsecreatesksbandbuf(const ae_int_t m, const ae_int_t n, const ae_int_t bw, const sparsematrix &s, const xparams _xparams)
4452 {
4453     jmp_buf _break_jump;
4454     alglib_impl::ae_state _alglib_env_state;
4455     alglib_impl::ae_state_init(&_alglib_env_state);
4456     if( setjmp(_break_jump) )
4457     {
4458 #if !defined(AE_NO_EXCEPTIONS)
4459         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4460 #else
4461         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4462         return;
4463 #endif
4464     }
4465     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4466     if( _xparams.flags!=0x0 )
4467         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4468     alglib_impl::sparsecreatesksbandbuf(m, n, bw, const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
4469     alglib_impl::ae_state_clear(&_alglib_env_state);
4470     return;
4471 }
4472 
4473 /*************************************************************************
4474 This function copies S0 to S1.
4475 This function completely deallocates memory owned by S1 before creating a
4476 copy of S0. If you want to reuse memory, use SparseCopyBuf.
4477 
4478 NOTE:  this  function  does  not verify its arguments, it just copies all
4479 fields of the structure.
4480 
4481   -- ALGLIB PROJECT --
4482      Copyright 14.10.2011 by Bochkanov Sergey
4483 *************************************************************************/
sparsecopy(const sparsematrix & s0,sparsematrix & s1,const xparams _xparams)4484 void sparsecopy(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
4485 {
4486     jmp_buf _break_jump;
4487     alglib_impl::ae_state _alglib_env_state;
4488     alglib_impl::ae_state_init(&_alglib_env_state);
4489     if( setjmp(_break_jump) )
4490     {
4491 #if !defined(AE_NO_EXCEPTIONS)
4492         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4493 #else
4494         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4495         return;
4496 #endif
4497     }
4498     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4499     if( _xparams.flags!=0x0 )
4500         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4501     alglib_impl::sparsecopy(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
4502     alglib_impl::ae_state_clear(&_alglib_env_state);
4503     return;
4504 }
4505 
4506 /*************************************************************************
4507 This function copies S0 to S1.
4508 Memory already allocated in S1 is reused as much as possible.
4509 
4510 NOTE:  this  function  does  not verify its arguments, it just copies all
4511 fields of the structure.
4512 
4513   -- ALGLIB PROJECT --
4514      Copyright 14.10.2011 by Bochkanov Sergey
4515 *************************************************************************/
sparsecopybuf(const sparsematrix & s0,const sparsematrix & s1,const xparams _xparams)4516 void sparsecopybuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
4517 {
4518     jmp_buf _break_jump;
4519     alglib_impl::ae_state _alglib_env_state;
4520     alglib_impl::ae_state_init(&_alglib_env_state);
4521     if( setjmp(_break_jump) )
4522     {
4523 #if !defined(AE_NO_EXCEPTIONS)
4524         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4525 #else
4526         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4527         return;
4528 #endif
4529     }
4530     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4531     if( _xparams.flags!=0x0 )
4532         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4533     alglib_impl::sparsecopybuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
4534     alglib_impl::ae_state_clear(&_alglib_env_state);
4535     return;
4536 }
4537 
4538 /*************************************************************************
4539 This function efficiently swaps contents of S0 and S1.
4540 
4541   -- ALGLIB PROJECT --
4542      Copyright 16.01.2014 by Bochkanov Sergey
4543 *************************************************************************/
sparseswap(const sparsematrix & s0,const sparsematrix & s1,const xparams _xparams)4544 void sparseswap(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
4545 {
4546     jmp_buf _break_jump;
4547     alglib_impl::ae_state _alglib_env_state;
4548     alglib_impl::ae_state_init(&_alglib_env_state);
4549     if( setjmp(_break_jump) )
4550     {
4551 #if !defined(AE_NO_EXCEPTIONS)
4552         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4553 #else
4554         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4555         return;
4556 #endif
4557     }
4558     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4559     if( _xparams.flags!=0x0 )
4560         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4561     alglib_impl::sparseswap(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
4562     alglib_impl::ae_state_clear(&_alglib_env_state);
4563     return;
4564 }
4565 
4566 /*************************************************************************
4567 This function adds value to S[i,j] - element of the sparse matrix. Matrix
4568 must be in a Hash-Table mode.
4569 
4570 In case S[i,j] already exists in the table, V i added to  its  value.  In
4571 case  S[i,j]  is  non-existent,  it  is  inserted  in  the  table.  Table
4572 automatically grows when necessary.
4573 
4574 INPUT PARAMETERS
4575     S           -   sparse M*N matrix in Hash-Table representation.
4576                     Exception will be thrown for CRS matrix.
4577     I           -   row index of the element to modify, 0<=I<M
4578     J           -   column index of the element to modify, 0<=J<N
4579     V           -   value to add, must be finite number
4580 
4581 OUTPUT PARAMETERS
4582     S           -   modified matrix
4583 
4584 NOTE 1:  when  S[i,j]  is exactly zero after modification, it is  deleted
4585 from the table.
4586 
4587   -- ALGLIB PROJECT --
4588      Copyright 14.10.2011 by Bochkanov Sergey
4589 *************************************************************************/
sparseadd(const sparsematrix & s,const ae_int_t i,const ae_int_t j,const double v,const xparams _xparams)4590 void sparseadd(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const double v, const xparams _xparams)
4591 {
4592     jmp_buf _break_jump;
4593     alglib_impl::ae_state _alglib_env_state;
4594     alglib_impl::ae_state_init(&_alglib_env_state);
4595     if( setjmp(_break_jump) )
4596     {
4597 #if !defined(AE_NO_EXCEPTIONS)
4598         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4599 #else
4600         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4601         return;
4602 #endif
4603     }
4604     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4605     if( _xparams.flags!=0x0 )
4606         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4607     alglib_impl::sparseadd(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, v, &_alglib_env_state);
4608     alglib_impl::ae_state_clear(&_alglib_env_state);
4609     return;
4610 }
4611 
4612 /*************************************************************************
4613 This function modifies S[i,j] - element of the sparse matrix.
4614 
4615 For Hash-based storage format:
4616 * this function can be called at any moment - during matrix initialization
4617   or later
4618 * new value can be zero or non-zero.  In case new value of S[i,j] is zero,
4619   this element is deleted from the table.
4620 * this  function  has  no  effect when called with zero V for non-existent
4621   element.
4622 
4623 For CRS-bases storage format:
4624 * this function can be called ONLY DURING MATRIX INITIALIZATION
4625 * zero values are stored in the matrix similarly to non-zero ones
4626 * elements must be initialized in correct order -  from top row to bottom,
4627   within row - from left to right.
4628 
4629 For SKS storage:
4630 * this function can be called at any moment - during matrix initialization
4631   or later
4632 * zero values are stored in the matrix similarly to non-zero ones
4633 * this function CAN NOT be called for non-existent (outside  of  the  band
4634   specified during SKS matrix creation) elements. Say, if you created  SKS
4635   matrix  with  bandwidth=2  and  tried to call sparseset(s,0,10,VAL),  an
4636   exception will be generated.
4637 
4638 INPUT PARAMETERS
4639     S           -   sparse M*N matrix in Hash-Table, SKS or CRS format.
4640     I           -   row index of the element to modify, 0<=I<M
4641     J           -   column index of the element to modify, 0<=J<N
4642     V           -   value to set, must be finite number, can be zero
4643 
4644 OUTPUT PARAMETERS
4645     S           -   modified matrix
4646 
4647   -- ALGLIB PROJECT --
4648      Copyright 14.10.2011 by Bochkanov Sergey
4649 *************************************************************************/
sparseset(const sparsematrix & s,const ae_int_t i,const ae_int_t j,const double v,const xparams _xparams)4650 void sparseset(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const double v, const xparams _xparams)
4651 {
4652     jmp_buf _break_jump;
4653     alglib_impl::ae_state _alglib_env_state;
4654     alglib_impl::ae_state_init(&_alglib_env_state);
4655     if( setjmp(_break_jump) )
4656     {
4657 #if !defined(AE_NO_EXCEPTIONS)
4658         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4659 #else
4660         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4661         return;
4662 #endif
4663     }
4664     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4665     if( _xparams.flags!=0x0 )
4666         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4667     alglib_impl::sparseset(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, v, &_alglib_env_state);
4668     alglib_impl::ae_state_clear(&_alglib_env_state);
4669     return;
4670 }
4671 
4672 /*************************************************************************
4673 This function returns S[i,j] - element of the sparse matrix.  Matrix  can
4674 be in any mode (Hash-Table, CRS, SKS), but this function is less efficient
4675 for CRS matrices. Hash-Table and SKS matrices can find  element  in  O(1)
4676 time, while  CRS  matrices need O(log(RS)) time, where RS is an number of
4677 non-zero elements in a row.
4678 
4679 INPUT PARAMETERS
4680     S           -   sparse M*N matrix
4681     I           -   row index of the element to modify, 0<=I<M
4682     J           -   column index of the element to modify, 0<=J<N
4683 
4684 RESULT
4685     value of S[I,J] or zero (in case no element with such index is found)
4686 
4687   -- ALGLIB PROJECT --
4688      Copyright 14.10.2011 by Bochkanov Sergey
4689 *************************************************************************/
sparseget(const sparsematrix & s,const ae_int_t i,const ae_int_t j,const xparams _xparams)4690 double sparseget(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const xparams _xparams)
4691 {
4692     jmp_buf _break_jump;
4693     alglib_impl::ae_state _alglib_env_state;
4694     alglib_impl::ae_state_init(&_alglib_env_state);
4695     if( setjmp(_break_jump) )
4696     {
4697 #if !defined(AE_NO_EXCEPTIONS)
4698         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4699 #else
4700         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4701         return 0;
4702 #endif
4703     }
4704     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4705     if( _xparams.flags!=0x0 )
4706         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4707     double result = alglib_impl::sparseget(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, &_alglib_env_state);
4708     alglib_impl::ae_state_clear(&_alglib_env_state);
4709     return *(reinterpret_cast<double*>(&result));
4710 }
4711 
4712 /*************************************************************************
4713 This function checks whether S[i,j] is present in the sparse  matrix.  It
4714 returns True even for elements  that  are  numerically  zero  (but  still
4715 have place allocated for them).
4716 
4717 The matrix  can be in any mode (Hash-Table, CRS, SKS), but this  function
4718 is less efficient for CRS matrices. Hash-Table and SKS matrices can  find
4719 element in O(1) time, while  CRS  matrices need O(log(RS)) time, where RS
4720 is an number of non-zero elements in a row.
4721 
4722 INPUT PARAMETERS
4723     S           -   sparse M*N matrix
4724     I           -   row index of the element to modify, 0<=I<M
4725     J           -   column index of the element to modify, 0<=J<N
4726 
4727 RESULT
4728     whether S[I,J] is present in the data structure or not
4729 
4730   -- ALGLIB PROJECT --
4731      Copyright 14.10.2020 by Bochkanov Sergey
4732 *************************************************************************/
sparseexists(const sparsematrix & s,const ae_int_t i,const ae_int_t j,const xparams _xparams)4733 bool sparseexists(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const xparams _xparams)
4734 {
4735     jmp_buf _break_jump;
4736     alglib_impl::ae_state _alglib_env_state;
4737     alglib_impl::ae_state_init(&_alglib_env_state);
4738     if( setjmp(_break_jump) )
4739     {
4740 #if !defined(AE_NO_EXCEPTIONS)
4741         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4742 #else
4743         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4744         return 0;
4745 #endif
4746     }
4747     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4748     if( _xparams.flags!=0x0 )
4749         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4750     ae_bool result = alglib_impl::sparseexists(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, &_alglib_env_state);
4751     alglib_impl::ae_state_clear(&_alglib_env_state);
4752     return *(reinterpret_cast<bool*>(&result));
4753 }
4754 
4755 /*************************************************************************
4756 This function returns I-th diagonal element of the sparse matrix.
4757 
4758 Matrix can be in any mode (Hash-Table or CRS storage), but this  function
4759 is most efficient for CRS matrices - it requires less than 50 CPU  cycles
4760 to extract diagonal element. For Hash-Table matrices we still  have  O(1)
4761 query time, but function is many times slower.
4762 
4763 INPUT PARAMETERS
4764     S           -   sparse M*N matrix in Hash-Table representation.
4765                     Exception will be thrown for CRS matrix.
4766     I           -   index of the element to modify, 0<=I<min(M,N)
4767 
4768 RESULT
4769     value of S[I,I] or zero (in case no element with such index is found)
4770 
4771   -- ALGLIB PROJECT --
4772      Copyright 14.10.2011 by Bochkanov Sergey
4773 *************************************************************************/
sparsegetdiagonal(const sparsematrix & s,const ae_int_t i,const xparams _xparams)4774 double sparsegetdiagonal(const sparsematrix &s, const ae_int_t i, const xparams _xparams)
4775 {
4776     jmp_buf _break_jump;
4777     alglib_impl::ae_state _alglib_env_state;
4778     alglib_impl::ae_state_init(&_alglib_env_state);
4779     if( setjmp(_break_jump) )
4780     {
4781 #if !defined(AE_NO_EXCEPTIONS)
4782         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4783 #else
4784         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4785         return 0;
4786 #endif
4787     }
4788     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4789     if( _xparams.flags!=0x0 )
4790         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4791     double result = alglib_impl::sparsegetdiagonal(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, &_alglib_env_state);
4792     alglib_impl::ae_state_clear(&_alglib_env_state);
4793     return *(reinterpret_cast<double*>(&result));
4794 }
4795 
4796 /*************************************************************************
4797 This function calculates matrix-vector product  S*x.  Matrix  S  must  be
4798 stored in CRS or SKS format (exception will be thrown otherwise).
4799 
4800 INPUT PARAMETERS
4801     S           -   sparse M*N matrix in CRS or SKS format.
4802     X           -   array[N], input vector. For  performance  reasons  we
4803                     make only quick checks - we check that array size  is
4804                     at least N, but we do not check for NAN's or INF's.
4805     Y           -   output buffer, possibly preallocated. In case  buffer
4806                     size is too small to store  result,  this  buffer  is
4807                     automatically resized.
4808 
4809 OUTPUT PARAMETERS
4810     Y           -   array[M], S*x
4811 
4812 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
4813 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
4814 this function.
4815 
4816   -- ALGLIB PROJECT --
4817      Copyright 14.10.2011 by Bochkanov Sergey
4818 *************************************************************************/
sparsemv(const sparsematrix & s,const real_1d_array & x,real_1d_array & y,const xparams _xparams)4819 void sparsemv(const sparsematrix &s, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
4820 {
4821     jmp_buf _break_jump;
4822     alglib_impl::ae_state _alglib_env_state;
4823     alglib_impl::ae_state_init(&_alglib_env_state);
4824     if( setjmp(_break_jump) )
4825     {
4826 #if !defined(AE_NO_EXCEPTIONS)
4827         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4828 #else
4829         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4830         return;
4831 #endif
4832     }
4833     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4834     if( _xparams.flags!=0x0 )
4835         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4836     alglib_impl::sparsemv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
4837     alglib_impl::ae_state_clear(&_alglib_env_state);
4838     return;
4839 }
4840 
4841 /*************************************************************************
4842 This function calculates matrix-vector product  S^T*x. Matrix S  must  be
4843 stored in CRS or SKS format (exception will be thrown otherwise).
4844 
4845 INPUT PARAMETERS
4846     S           -   sparse M*N matrix in CRS or SKS format.
4847     X           -   array[M], input vector. For  performance  reasons  we
4848                     make only quick checks - we check that array size  is
4849                     at least M, but we do not check for NAN's or INF's.
4850     Y           -   output buffer, possibly preallocated. In case  buffer
4851                     size is too small to store  result,  this  buffer  is
4852                     automatically resized.
4853 
4854 OUTPUT PARAMETERS
4855     Y           -   array[N], S^T*x
4856 
4857 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
4858 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
4859 this function.
4860 
4861   -- ALGLIB PROJECT --
4862      Copyright 14.10.2011 by Bochkanov Sergey
4863 *************************************************************************/
sparsemtv(const sparsematrix & s,const real_1d_array & x,real_1d_array & y,const xparams _xparams)4864 void sparsemtv(const sparsematrix &s, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
4865 {
4866     jmp_buf _break_jump;
4867     alglib_impl::ae_state _alglib_env_state;
4868     alglib_impl::ae_state_init(&_alglib_env_state);
4869     if( setjmp(_break_jump) )
4870     {
4871 #if !defined(AE_NO_EXCEPTIONS)
4872         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4873 #else
4874         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4875         return;
4876 #endif
4877     }
4878     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4879     if( _xparams.flags!=0x0 )
4880         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4881     alglib_impl::sparsemtv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
4882     alglib_impl::ae_state_clear(&_alglib_env_state);
4883     return;
4884 }
4885 
4886 /*************************************************************************
4887 This function calculates generalized sparse matrix-vector product
4888 
4889     y := alpha*op(S)*x + beta*y
4890 
4891 Matrix S must be stored in CRS or SKS format (exception  will  be  thrown
4892 otherwise). op(S) can be either S or S^T.
4893 
4894 NOTE: this  function  expects  Y  to  be  large enough to store result. No
4895       automatic preallocation happens for smaller arrays.
4896 
4897 INPUT PARAMETERS
4898     S           -   sparse matrix in CRS or SKS format.
4899     Alpha       -   source coefficient
4900     OpS         -   operation type:
4901                     * OpS=0     =>  op(S) = S
4902                     * OpS=1     =>  op(S) = S^T
4903     X           -   input vector, must have at least Cols(op(S))+IX elements
4904     IX          -   subvector offset
4905     Beta        -   destination coefficient
4906     Y           -   preallocated output array, must have at least Rows(op(S))+IY elements
4907     IY          -   subvector offset
4908 
4909 OUTPUT PARAMETERS
4910     Y           -   elements [IY...IY+Rows(op(S))-1] are replaced by result,
4911                     other elements are not modified
4912 
4913 HANDLING OF SPECIAL CASES:
4914 * below M=Rows(op(S)) and N=Cols(op(S)). Although current  ALGLIB  version
4915   does not allow you to  create  zero-sized  sparse  matrices,  internally
4916   ALGLIB  can  deal  with  such matrices. So, comments for M or N equal to
4917   zero are for internal use only.
4918 * if M=0, then subroutine does nothing. It does not even touch arrays.
4919 * if N=0 or Alpha=0.0, then:
4920   * if Beta=0, then Y is filled by zeros. S and X are  not  referenced  at
4921     all. Initial values of Y are ignored (we do not  multiply  Y by  zero,
4922     we just rewrite it by zeros)
4923   * if Beta<>0, then Y is replaced by Beta*Y
4924 * if M>0, N>0, Alpha<>0, but  Beta=0, then  Y is replaced by alpha*op(S)*x
4925   initial state of Y  is ignored (rewritten without initial multiplication
4926   by zeros).
4927 
4928 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
4929 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
4930 this function.
4931 
4932   -- ALGLIB PROJECT --
4933      Copyright 10.12.2019 by Bochkanov Sergey
4934 *************************************************************************/
sparsegemv(const sparsematrix & s,const double alpha,const ae_int_t ops,const real_1d_array & x,const ae_int_t ix,const double beta,const real_1d_array & y,const ae_int_t iy,const xparams _xparams)4935 void sparsegemv(const sparsematrix &s, const double alpha, const ae_int_t ops, const real_1d_array &x, const ae_int_t ix, const double beta, const real_1d_array &y, const ae_int_t iy, const xparams _xparams)
4936 {
4937     jmp_buf _break_jump;
4938     alglib_impl::ae_state _alglib_env_state;
4939     alglib_impl::ae_state_init(&_alglib_env_state);
4940     if( setjmp(_break_jump) )
4941     {
4942 #if !defined(AE_NO_EXCEPTIONS)
4943         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4944 #else
4945         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4946         return;
4947 #endif
4948     }
4949     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4950     if( _xparams.flags!=0x0 )
4951         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4952     alglib_impl::sparsegemv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), alpha, ops, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), ix, beta, const_cast<alglib_impl::ae_vector*>(y.c_ptr()), iy, &_alglib_env_state);
4953     alglib_impl::ae_state_clear(&_alglib_env_state);
4954     return;
4955 }
4956 
4957 /*************************************************************************
4958 This function simultaneously calculates two matrix-vector products:
4959     S*x and S^T*x.
4960 S must be square (non-rectangular) matrix stored in  CRS  or  SKS  format
4961 (exception will be thrown otherwise).
4962 
4963 INPUT PARAMETERS
4964     S           -   sparse N*N matrix in CRS or SKS format.
4965     X           -   array[N], input vector. For  performance  reasons  we
4966                     make only quick checks - we check that array size  is
4967                     at least N, but we do not check for NAN's or INF's.
4968     Y0          -   output buffer, possibly preallocated. In case  buffer
4969                     size is too small to store  result,  this  buffer  is
4970                     automatically resized.
4971     Y1          -   output buffer, possibly preallocated. In case  buffer
4972                     size is too small to store  result,  this  buffer  is
4973                     automatically resized.
4974 
4975 OUTPUT PARAMETERS
4976     Y0          -   array[N], S*x
4977     Y1          -   array[N], S^T*x
4978 
4979 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
4980 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
4981 this function.
4982 
4983   -- ALGLIB PROJECT --
4984      Copyright 14.10.2011 by Bochkanov Sergey
4985 *************************************************************************/
sparsemv2(const sparsematrix & s,const real_1d_array & x,real_1d_array & y0,real_1d_array & y1,const xparams _xparams)4986 void sparsemv2(const sparsematrix &s, const real_1d_array &x, real_1d_array &y0, real_1d_array &y1, const xparams _xparams)
4987 {
4988     jmp_buf _break_jump;
4989     alglib_impl::ae_state _alglib_env_state;
4990     alglib_impl::ae_state_init(&_alglib_env_state);
4991     if( setjmp(_break_jump) )
4992     {
4993 #if !defined(AE_NO_EXCEPTIONS)
4994         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4995 #else
4996         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4997         return;
4998 #endif
4999     }
5000     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5001     if( _xparams.flags!=0x0 )
5002         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5003     alglib_impl::sparsemv2(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y0.c_ptr()), const_cast<alglib_impl::ae_vector*>(y1.c_ptr()), &_alglib_env_state);
5004     alglib_impl::ae_state_clear(&_alglib_env_state);
5005     return;
5006 }
5007 
5008 /*************************************************************************
5009 This function calculates matrix-vector product  S*x, when S is  symmetric
5010 matrix. Matrix S  must be stored in CRS or SKS format  (exception will be
5011 thrown otherwise).
5012 
5013 INPUT PARAMETERS
5014     S           -   sparse M*M matrix in CRS or SKS format.
5015     IsUpper     -   whether upper or lower triangle of S is given:
5016                     * if upper triangle is given,  only   S[i,j] for j>=i
5017                       are used, and lower triangle is ignored (it can  be
5018                       empty - these elements are not referenced at all).
5019                     * if lower triangle is given,  only   S[i,j] for j<=i
5020                       are used, and upper triangle is ignored.
5021     X           -   array[N], input vector. For  performance  reasons  we
5022                     make only quick checks - we check that array size  is
5023                     at least N, but we do not check for NAN's or INF's.
5024     Y           -   output buffer, possibly preallocated. In case  buffer
5025                     size is too small to store  result,  this  buffer  is
5026                     automatically resized.
5027 
5028 OUTPUT PARAMETERS
5029     Y           -   array[M], S*x
5030 
5031 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
5032 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
5033 this function.
5034 
5035   -- ALGLIB PROJECT --
5036      Copyright 14.10.2011 by Bochkanov Sergey
5037 *************************************************************************/
sparsesmv(const sparsematrix & s,const bool isupper,const real_1d_array & x,real_1d_array & y,const xparams _xparams)5038 void sparsesmv(const sparsematrix &s, const bool isupper, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
5039 {
5040     jmp_buf _break_jump;
5041     alglib_impl::ae_state _alglib_env_state;
5042     alglib_impl::ae_state_init(&_alglib_env_state);
5043     if( setjmp(_break_jump) )
5044     {
5045 #if !defined(AE_NO_EXCEPTIONS)
5046         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5047 #else
5048         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5049         return;
5050 #endif
5051     }
5052     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5053     if( _xparams.flags!=0x0 )
5054         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5055     alglib_impl::sparsesmv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
5056     alglib_impl::ae_state_clear(&_alglib_env_state);
5057     return;
5058 }
5059 
5060 /*************************************************************************
5061 This function calculates vector-matrix-vector product x'*S*x, where  S is
5062 symmetric matrix. Matrix S must be stored in CRS or SKS format (exception
5063 will be thrown otherwise).
5064 
5065 INPUT PARAMETERS
5066     S           -   sparse M*M matrix in CRS or SKS format.
5067     IsUpper     -   whether upper or lower triangle of S is given:
5068                     * if upper triangle is given,  only   S[i,j] for j>=i
5069                       are used, and lower triangle is ignored (it can  be
5070                       empty - these elements are not referenced at all).
5071                     * if lower triangle is given,  only   S[i,j] for j<=i
5072                       are used, and upper triangle is ignored.
5073     X           -   array[N], input vector. For  performance  reasons  we
5074                     make only quick checks - we check that array size  is
5075                     at least N, but we do not check for NAN's or INF's.
5076 
5077 RESULT
5078     x'*S*x
5079 
5080 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
5081 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
5082 this function.
5083 
5084   -- ALGLIB PROJECT --
5085      Copyright 27.01.2014 by Bochkanov Sergey
5086 *************************************************************************/
sparsevsmv(const sparsematrix & s,const bool isupper,const real_1d_array & x,const xparams _xparams)5087 double sparsevsmv(const sparsematrix &s, const bool isupper, const real_1d_array &x, const xparams _xparams)
5088 {
5089     jmp_buf _break_jump;
5090     alglib_impl::ae_state _alglib_env_state;
5091     alglib_impl::ae_state_init(&_alglib_env_state);
5092     if( setjmp(_break_jump) )
5093     {
5094 #if !defined(AE_NO_EXCEPTIONS)
5095         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5096 #else
5097         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5098         return 0;
5099 #endif
5100     }
5101     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5102     if( _xparams.flags!=0x0 )
5103         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5104     double result = alglib_impl::sparsevsmv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
5105     alglib_impl::ae_state_clear(&_alglib_env_state);
5106     return *(reinterpret_cast<double*>(&result));
5107 }
5108 
5109 /*************************************************************************
5110 This function calculates matrix-matrix product  S*A.  Matrix  S  must  be
5111 stored in CRS or SKS format (exception will be thrown otherwise).
5112 
5113 INPUT PARAMETERS
5114     S           -   sparse M*N matrix in CRS or SKS format.
5115     A           -   array[N][K], input dense matrix. For  performance reasons
5116                     we make only quick checks - we check that array size
5117                     is at least N, but we do not check for NAN's or INF's.
5118     K           -   number of columns of matrix (A).
5119     B           -   output buffer, possibly preallocated. In case  buffer
5120                     size is too small to store  result,  this  buffer  is
5121                     automatically resized.
5122 
5123 OUTPUT PARAMETERS
5124     B           -   array[M][K], S*A
5125 
5126 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
5127 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
5128 this function.
5129 
5130   -- ALGLIB PROJECT --
5131      Copyright 14.10.2011 by Bochkanov Sergey
5132 *************************************************************************/
sparsemm(const sparsematrix & s,const real_2d_array & a,const ae_int_t k,real_2d_array & b,const xparams _xparams)5133 void sparsemm(const sparsematrix &s, const real_2d_array &a, const ae_int_t k, real_2d_array &b, const xparams _xparams)
5134 {
5135     jmp_buf _break_jump;
5136     alglib_impl::ae_state _alglib_env_state;
5137     alglib_impl::ae_state_init(&_alglib_env_state);
5138     if( setjmp(_break_jump) )
5139     {
5140 #if !defined(AE_NO_EXCEPTIONS)
5141         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5142 #else
5143         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5144         return;
5145 #endif
5146     }
5147     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5148     if( _xparams.flags!=0x0 )
5149         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5150     alglib_impl::sparsemm(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), k, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), &_alglib_env_state);
5151     alglib_impl::ae_state_clear(&_alglib_env_state);
5152     return;
5153 }
5154 
5155 /*************************************************************************
5156 This function calculates matrix-matrix product  S^T*A. Matrix S  must  be
5157 stored in CRS or SKS format (exception will be thrown otherwise).
5158 
5159 INPUT PARAMETERS
5160     S           -   sparse M*N matrix in CRS or SKS format.
5161     A           -   array[M][K], input dense matrix. For performance reasons
5162                     we make only quick checks - we check that array size  is
5163                     at least M, but we do not check for NAN's or INF's.
5164     K           -   number of columns of matrix (A).
5165     B           -   output buffer, possibly preallocated. In case  buffer
5166                     size is too small to store  result,  this  buffer  is
5167                     automatically resized.
5168 
5169 OUTPUT PARAMETERS
5170     B           -   array[N][K], S^T*A
5171 
5172 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
5173 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
5174 this function.
5175 
5176   -- ALGLIB PROJECT --
5177      Copyright 14.10.2011 by Bochkanov Sergey
5178 *************************************************************************/
sparsemtm(const sparsematrix & s,const real_2d_array & a,const ae_int_t k,real_2d_array & b,const xparams _xparams)5179 void sparsemtm(const sparsematrix &s, const real_2d_array &a, const ae_int_t k, real_2d_array &b, const xparams _xparams)
5180 {
5181     jmp_buf _break_jump;
5182     alglib_impl::ae_state _alglib_env_state;
5183     alglib_impl::ae_state_init(&_alglib_env_state);
5184     if( setjmp(_break_jump) )
5185     {
5186 #if !defined(AE_NO_EXCEPTIONS)
5187         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5188 #else
5189         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5190         return;
5191 #endif
5192     }
5193     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5194     if( _xparams.flags!=0x0 )
5195         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5196     alglib_impl::sparsemtm(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), k, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), &_alglib_env_state);
5197     alglib_impl::ae_state_clear(&_alglib_env_state);
5198     return;
5199 }
5200 
5201 /*************************************************************************
5202 This function simultaneously calculates two matrix-matrix products:
5203     S*A and S^T*A.
5204 S  must  be  square (non-rectangular) matrix stored in CRS or  SKS  format
5205 (exception will be thrown otherwise).
5206 
5207 INPUT PARAMETERS
5208     S           -   sparse N*N matrix in CRS or SKS format.
5209     A           -   array[N][K], input dense matrix. For performance reasons
5210                     we make only quick checks - we check that array size  is
5211                     at least N, but we do not check for NAN's or INF's.
5212     K           -   number of columns of matrix (A).
5213     B0          -   output buffer, possibly preallocated. In case  buffer
5214                     size is too small to store  result,  this  buffer  is
5215                     automatically resized.
5216     B1          -   output buffer, possibly preallocated. In case  buffer
5217                     size is too small to store  result,  this  buffer  is
5218                     automatically resized.
5219 
5220 OUTPUT PARAMETERS
5221     B0          -   array[N][K], S*A
5222     B1          -   array[N][K], S^T*A
5223 
5224 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
5225 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
5226 this function.
5227 
5228   -- ALGLIB PROJECT --
5229      Copyright 14.10.2011 by Bochkanov Sergey
5230 *************************************************************************/
sparsemm2(const sparsematrix & s,const real_2d_array & a,const ae_int_t k,real_2d_array & b0,real_2d_array & b1,const xparams _xparams)5231 void sparsemm2(const sparsematrix &s, const real_2d_array &a, const ae_int_t k, real_2d_array &b0, real_2d_array &b1, const xparams _xparams)
5232 {
5233     jmp_buf _break_jump;
5234     alglib_impl::ae_state _alglib_env_state;
5235     alglib_impl::ae_state_init(&_alglib_env_state);
5236     if( setjmp(_break_jump) )
5237     {
5238 #if !defined(AE_NO_EXCEPTIONS)
5239         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5240 #else
5241         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5242         return;
5243 #endif
5244     }
5245     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5246     if( _xparams.flags!=0x0 )
5247         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5248     alglib_impl::sparsemm2(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), k, const_cast<alglib_impl::ae_matrix*>(b0.c_ptr()), const_cast<alglib_impl::ae_matrix*>(b1.c_ptr()), &_alglib_env_state);
5249     alglib_impl::ae_state_clear(&_alglib_env_state);
5250     return;
5251 }
5252 
5253 /*************************************************************************
5254 This function calculates matrix-matrix product  S*A, when S  is  symmetric
5255 matrix. Matrix S must be stored in CRS or SKS format  (exception  will  be
5256 thrown otherwise).
5257 
5258 INPUT PARAMETERS
5259     S           -   sparse M*M matrix in CRS or SKS format.
5260     IsUpper     -   whether upper or lower triangle of S is given:
5261                     * if upper triangle is given,  only   S[i,j] for j>=i
5262                       are used, and lower triangle is ignored (it can  be
5263                       empty - these elements are not referenced at all).
5264                     * if lower triangle is given,  only   S[i,j] for j<=i
5265                       are used, and upper triangle is ignored.
5266     A           -   array[N][K], input dense matrix. For performance reasons
5267                     we make only quick checks - we check that array size is
5268                     at least N, but we do not check for NAN's or INF's.
5269     K           -   number of columns of matrix (A).
5270     B           -   output buffer, possibly preallocated. In case  buffer
5271                     size is too small to store  result,  this  buffer  is
5272                     automatically resized.
5273 
5274 OUTPUT PARAMETERS
5275     B           -   array[M][K], S*A
5276 
5277 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
5278 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
5279 this function.
5280 
5281   -- ALGLIB PROJECT --
5282      Copyright 14.10.2011 by Bochkanov Sergey
5283 *************************************************************************/
sparsesmm(const sparsematrix & s,const bool isupper,const real_2d_array & a,const ae_int_t k,real_2d_array & b,const xparams _xparams)5284 void sparsesmm(const sparsematrix &s, const bool isupper, const real_2d_array &a, const ae_int_t k, real_2d_array &b, const xparams _xparams)
5285 {
5286     jmp_buf _break_jump;
5287     alglib_impl::ae_state _alglib_env_state;
5288     alglib_impl::ae_state_init(&_alglib_env_state);
5289     if( setjmp(_break_jump) )
5290     {
5291 #if !defined(AE_NO_EXCEPTIONS)
5292         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5293 #else
5294         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5295         return;
5296 #endif
5297     }
5298     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5299     if( _xparams.flags!=0x0 )
5300         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5301     alglib_impl::sparsesmm(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), k, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), &_alglib_env_state);
5302     alglib_impl::ae_state_clear(&_alglib_env_state);
5303     return;
5304 }
5305 
5306 /*************************************************************************
5307 This function calculates matrix-vector product op(S)*x, when x is  vector,
5308 S is symmetric triangular matrix, op(S) is transposition or no  operation.
5309 Matrix S must be stored in CRS or SKS format  (exception  will  be  thrown
5310 otherwise).
5311 
5312 INPUT PARAMETERS
5313     S           -   sparse square matrix in CRS or SKS format.
5314     IsUpper     -   whether upper or lower triangle of S is used:
5315                     * if upper triangle is given,  only   S[i,j] for  j>=i
5316                       are used, and lower triangle is  ignored (it can  be
5317                       empty - these elements are not referenced at all).
5318                     * if lower triangle is given,  only   S[i,j] for  j<=i
5319                       are used, and upper triangle is ignored.
5320     IsUnit      -   unit or non-unit diagonal:
5321                     * if True, diagonal elements of triangular matrix  are
5322                       considered equal to 1.0. Actual elements  stored  in
5323                       S are not referenced at all.
5324                     * if False, diagonal stored in S is used
5325     OpType      -   operation type:
5326                     * if 0, S*x is calculated
5327                     * if 1, (S^T)*x is calculated (transposition)
5328     X           -   array[N] which stores input  vector.  For  performance
5329                     reasons we make only quick  checks  -  we  check  that
5330                     array  size  is  at  least  N, but we do not check for
5331                     NAN's or INF's.
5332     Y           -   possibly  preallocated  input   buffer.  Automatically
5333                     resized if its size is too small.
5334 
5335 OUTPUT PARAMETERS
5336     Y           -   array[N], op(S)*x
5337 
5338 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
5339 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
5340 this function.
5341 
5342   -- ALGLIB PROJECT --
5343      Copyright 20.01.2014 by Bochkanov Sergey
5344 *************************************************************************/
sparsetrmv(const sparsematrix & s,const bool isupper,const bool isunit,const ae_int_t optype,const real_1d_array & x,real_1d_array & y,const xparams _xparams)5345 void sparsetrmv(const sparsematrix &s, const bool isupper, const bool isunit, const ae_int_t optype, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
5346 {
5347     jmp_buf _break_jump;
5348     alglib_impl::ae_state _alglib_env_state;
5349     alglib_impl::ae_state_init(&_alglib_env_state);
5350     if( setjmp(_break_jump) )
5351     {
5352 #if !defined(AE_NO_EXCEPTIONS)
5353         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5354 #else
5355         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5356         return;
5357 #endif
5358     }
5359     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5360     if( _xparams.flags!=0x0 )
5361         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5362     alglib_impl::sparsetrmv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, isunit, optype, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
5363     alglib_impl::ae_state_clear(&_alglib_env_state);
5364     return;
5365 }
5366 
5367 /*************************************************************************
5368 This function solves linear system op(S)*y=x  where  x  is  vector,  S  is
5369 symmetric  triangular  matrix,  op(S)  is  transposition  or no operation.
5370 Matrix S must be stored in CRS or SKS format  (exception  will  be  thrown
5371 otherwise).
5372 
5373 INPUT PARAMETERS
5374     S           -   sparse square matrix in CRS or SKS format.
5375     IsUpper     -   whether upper or lower triangle of S is used:
5376                     * if upper triangle is given,  only   S[i,j] for  j>=i
5377                       are used, and lower triangle is  ignored (it can  be
5378                       empty - these elements are not referenced at all).
5379                     * if lower triangle is given,  only   S[i,j] for  j<=i
5380                       are used, and upper triangle is ignored.
5381     IsUnit      -   unit or non-unit diagonal:
5382                     * if True, diagonal elements of triangular matrix  are
5383                       considered equal to 1.0. Actual elements  stored  in
5384                       S are not referenced at all.
5385                     * if False, diagonal stored in S is used. It  is  your
5386                       responsibility  to  make  sure  that   diagonal   is
5387                       non-zero.
5388     OpType      -   operation type:
5389                     * if 0, S*x is calculated
5390                     * if 1, (S^T)*x is calculated (transposition)
5391     X           -   array[N] which stores input  vector.  For  performance
5392                     reasons we make only quick  checks  -  we  check  that
5393                     array  size  is  at  least  N, but we do not check for
5394                     NAN's or INF's.
5395 
5396 OUTPUT PARAMETERS
5397     X           -   array[N], inv(op(S))*x
5398 
5399 NOTE: this function throws exception when called for  non-CRS/SKS  matrix.
5400       You must convert your matrix  with  SparseConvertToCRS/SKS()  before
5401       using this function.
5402 
5403 NOTE: no assertion or tests are done during algorithm  operation.   It  is
5404       your responsibility to provide invertible matrix to algorithm.
5405 
5406   -- ALGLIB PROJECT --
5407      Copyright 20.01.2014 by Bochkanov Sergey
5408 *************************************************************************/
sparsetrsv(const sparsematrix & s,const bool isupper,const bool isunit,const ae_int_t optype,const real_1d_array & x,const xparams _xparams)5409 void sparsetrsv(const sparsematrix &s, const bool isupper, const bool isunit, const ae_int_t optype, const real_1d_array &x, const xparams _xparams)
5410 {
5411     jmp_buf _break_jump;
5412     alglib_impl::ae_state _alglib_env_state;
5413     alglib_impl::ae_state_init(&_alglib_env_state);
5414     if( setjmp(_break_jump) )
5415     {
5416 #if !defined(AE_NO_EXCEPTIONS)
5417         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5418 #else
5419         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5420         return;
5421 #endif
5422     }
5423     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5424     if( _xparams.flags!=0x0 )
5425         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5426     alglib_impl::sparsetrsv(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), isupper, isunit, optype, const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
5427     alglib_impl::ae_state_clear(&_alglib_env_state);
5428     return;
5429 }
5430 
5431 /*************************************************************************
5432 This function applies permutation given by permutation table P (as opposed
5433 to product form of permutation) to sparse symmetric  matrix  A,  given  by
5434 either upper or lower triangle: B := P*A*P'.
5435 
5436 This function allocates completely new instance of B. Use buffered version
5437 SparseSymmPermTblBuf() if you want to reuse already allocated structure.
5438 
5439 INPUT PARAMETERS
5440     A           -   sparse square matrix in CRS format.
5441     IsUpper     -   whether upper or lower triangle of A is used:
5442                     * if upper triangle is given,  only   A[i,j] for  j>=i
5443                       are used, and lower triangle is  ignored (it can  be
5444                       empty - these elements are not referenced at all).
5445                     * if lower triangle is given,  only   A[i,j] for  j<=i
5446                       are used, and upper triangle is ignored.
5447     P           -   array[N] which stores permutation table;  P[I]=J means
5448                     that I-th row/column of matrix  A  is  moved  to  J-th
5449                     position. For performance reasons we do NOT check that
5450                     P[] is  a   correct   permutation  (that there  is  no
5451                     repetitions, just that all its elements  are  in [0,N)
5452                     range.
5453 
5454 OUTPUT PARAMETERS
5455     B           -   permuted matrix.  Permutation  is  applied  to A  from
5456                     the both sides, only upper or lower triangle (depending
5457                     on IsUpper) is stored.
5458 
5459 NOTE: this function throws exception when called for non-CRS  matrix.  You
5460       must convert your matrix with SparseConvertToCRS() before using this
5461       function.
5462 
5463   -- ALGLIB PROJECT --
5464      Copyright 05.10.2020 by Bochkanov Sergey.
5465 *************************************************************************/
sparsesymmpermtbl(const sparsematrix & a,const bool isupper,const integer_1d_array & p,sparsematrix & b,const xparams _xparams)5466 void sparsesymmpermtbl(const sparsematrix &a, const bool isupper, const integer_1d_array &p, sparsematrix &b, const xparams _xparams)
5467 {
5468     jmp_buf _break_jump;
5469     alglib_impl::ae_state _alglib_env_state;
5470     alglib_impl::ae_state_init(&_alglib_env_state);
5471     if( setjmp(_break_jump) )
5472     {
5473 #if !defined(AE_NO_EXCEPTIONS)
5474         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5475 #else
5476         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5477         return;
5478 #endif
5479     }
5480     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5481     if( _xparams.flags!=0x0 )
5482         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5483     alglib_impl::sparsesymmpermtbl(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(p.c_ptr()), const_cast<alglib_impl::sparsematrix*>(b.c_ptr()), &_alglib_env_state);
5484     alglib_impl::ae_state_clear(&_alglib_env_state);
5485     return;
5486 }
5487 
5488 /*************************************************************************
5489 This function is a buffered version  of  SparseSymmPermTbl()  that  reuses
5490 previously allocated storage in B as much as possible.
5491 
5492 This function applies permutation given by permutation table P (as opposed
5493 to product form of permutation) to sparse symmetric  matrix  A,  given  by
5494 either upper or lower triangle: B := P*A*P'.
5495 
5496 INPUT PARAMETERS
5497     A           -   sparse square matrix in CRS format.
5498     IsUpper     -   whether upper or lower triangle of A is used:
5499                     * if upper triangle is given,  only   A[i,j] for  j>=i
5500                       are used, and lower triangle is  ignored (it can  be
5501                       empty - these elements are not referenced at all).
5502                     * if lower triangle is given,  only   A[i,j] for  j<=i
5503                       are used, and upper triangle is ignored.
5504     P           -   array[N] which stores permutation table;  P[I]=J means
5505                     that I-th row/column of matrix  A  is  moved  to  J-th
5506                     position. For performance reasons we do NOT check that
5507                     P[] is  a   correct   permutation  (that there  is  no
5508                     repetitions, just that all its elements  are  in [0,N)
5509                     range.
5510     B           -   sparse matrix object that will hold output.
5511                     Previously allocated memory will be reused as much  as
5512                     possible.
5513 
5514 OUTPUT PARAMETERS
5515     B           -   permuted matrix.  Permutation  is  applied  to A  from
5516                     the both sides, only upper or lower triangle (depending
5517                     on IsUpper) is stored.
5518 
5519 NOTE: this function throws exception when called for non-CRS  matrix.  You
5520       must convert your matrix with SparseConvertToCRS() before using this
5521       function.
5522 
5523   -- ALGLIB PROJECT --
5524      Copyright 05.10.2020 by Bochkanov Sergey.
5525 *************************************************************************/
sparsesymmpermtblbuf(const sparsematrix & a,const bool isupper,const integer_1d_array & p,const sparsematrix & b,const xparams _xparams)5526 void sparsesymmpermtblbuf(const sparsematrix &a, const bool isupper, const integer_1d_array &p, const sparsematrix &b, const xparams _xparams)
5527 {
5528     jmp_buf _break_jump;
5529     alglib_impl::ae_state _alglib_env_state;
5530     alglib_impl::ae_state_init(&_alglib_env_state);
5531     if( setjmp(_break_jump) )
5532     {
5533 #if !defined(AE_NO_EXCEPTIONS)
5534         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5535 #else
5536         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5537         return;
5538 #endif
5539     }
5540     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5541     if( _xparams.flags!=0x0 )
5542         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5543     alglib_impl::sparsesymmpermtblbuf(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(p.c_ptr()), const_cast<alglib_impl::sparsematrix*>(b.c_ptr()), &_alglib_env_state);
5544     alglib_impl::ae_state_clear(&_alglib_env_state);
5545     return;
5546 }
5547 
5548 /*************************************************************************
5549 This procedure resizes Hash-Table matrix. It can be called when you  have
5550 deleted too many elements from the matrix, and you want to  free unneeded
5551 memory.
5552 
5553   -- ALGLIB PROJECT --
5554      Copyright 14.10.2011 by Bochkanov Sergey
5555 *************************************************************************/
sparseresizematrix(const sparsematrix & s,const xparams _xparams)5556 void sparseresizematrix(const sparsematrix &s, const xparams _xparams)
5557 {
5558     jmp_buf _break_jump;
5559     alglib_impl::ae_state _alglib_env_state;
5560     alglib_impl::ae_state_init(&_alglib_env_state);
5561     if( setjmp(_break_jump) )
5562     {
5563 #if !defined(AE_NO_EXCEPTIONS)
5564         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5565 #else
5566         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5567         return;
5568 #endif
5569     }
5570     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5571     if( _xparams.flags!=0x0 )
5572         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5573     alglib_impl::sparseresizematrix(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
5574     alglib_impl::ae_state_clear(&_alglib_env_state);
5575     return;
5576 }
5577 
5578 /*************************************************************************
5579 This  function  is  used  to enumerate all elements of the sparse matrix.
5580 Before  first  call  user  initializes  T0 and T1 counters by zero. These
5581 counters are used to remember current position in a  matrix;  after  each
5582 call they are updated by the function.
5583 
5584 Subsequent calls to this function return non-zero elements of the  sparse
5585 matrix, one by one. If you enumerate CRS matrix, matrix is traversed from
5586 left to right, from top to bottom. In case you enumerate matrix stored as
5587 Hash table, elements are returned in random order.
5588 
5589 EXAMPLE
5590     > T0=0
5591     > T1=0
5592     > while SparseEnumerate(S,T0,T1,I,J,V) do
5593     >     ....do something with I,J,V
5594 
5595 INPUT PARAMETERS
5596     S           -   sparse M*N matrix in Hash-Table or CRS representation.
5597     T0          -   internal counter
5598     T1          -   internal counter
5599 
5600 OUTPUT PARAMETERS
5601     T0          -   new value of the internal counter
5602     T1          -   new value of the internal counter
5603     I           -   row index of non-zero element, 0<=I<M.
5604     J           -   column index of non-zero element, 0<=J<N
5605     V           -   value of the T-th element
5606 
5607 RESULT
5608     True in case of success (next non-zero element was retrieved)
5609     False in case all non-zero elements were enumerated
5610 
5611 NOTE: you may call SparseRewriteExisting() during enumeration, but it  is
5612       THE  ONLY  matrix  modification  function  you  can  call!!!  Other
5613       matrix modification functions should not be called during enumeration!
5614 
5615   -- ALGLIB PROJECT --
5616      Copyright 14.03.2012 by Bochkanov Sergey
5617 *************************************************************************/
sparseenumerate(const sparsematrix & s,ae_int_t & t0,ae_int_t & t1,ae_int_t & i,ae_int_t & j,double & v,const xparams _xparams)5618 bool sparseenumerate(const sparsematrix &s, ae_int_t &t0, ae_int_t &t1, ae_int_t &i, ae_int_t &j, double &v, const xparams _xparams)
5619 {
5620     jmp_buf _break_jump;
5621     alglib_impl::ae_state _alglib_env_state;
5622     alglib_impl::ae_state_init(&_alglib_env_state);
5623     if( setjmp(_break_jump) )
5624     {
5625 #if !defined(AE_NO_EXCEPTIONS)
5626         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5627 #else
5628         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5629         return 0;
5630 #endif
5631     }
5632     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5633     if( _xparams.flags!=0x0 )
5634         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5635     ae_bool result = alglib_impl::sparseenumerate(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &t0, &t1, &i, &j, &v, &_alglib_env_state);
5636     alglib_impl::ae_state_clear(&_alglib_env_state);
5637     return *(reinterpret_cast<bool*>(&result));
5638 }
5639 
5640 /*************************************************************************
5641 This function rewrites existing (non-zero) element. It  returns  True   if
5642 element  exists  or  False,  when  it  is  called for non-existing  (zero)
5643 element.
5644 
5645 This function works with any kind of the matrix.
5646 
5647 The purpose of this function is to provide convenient thread-safe  way  to
5648 modify  sparse  matrix.  Such  modification  (already  existing element is
5649 rewritten) is guaranteed to be thread-safe without any synchronization, as
5650 long as different threads modify different elements.
5651 
5652 INPUT PARAMETERS
5653     S           -   sparse M*N matrix in any kind of representation
5654                     (Hash, SKS, CRS).
5655     I           -   row index of non-zero element to modify, 0<=I<M
5656     J           -   column index of non-zero element to modify, 0<=J<N
5657     V           -   value to rewrite, must be finite number
5658 
5659 OUTPUT PARAMETERS
5660     S           -   modified matrix
5661 RESULT
5662     True in case when element exists
5663     False in case when element doesn't exist or it is zero
5664 
5665   -- ALGLIB PROJECT --
5666      Copyright 14.03.2012 by Bochkanov Sergey
5667 *************************************************************************/
sparserewriteexisting(const sparsematrix & s,const ae_int_t i,const ae_int_t j,const double v,const xparams _xparams)5668 bool sparserewriteexisting(const sparsematrix &s, const ae_int_t i, const ae_int_t j, const double v, const xparams _xparams)
5669 {
5670     jmp_buf _break_jump;
5671     alglib_impl::ae_state _alglib_env_state;
5672     alglib_impl::ae_state_init(&_alglib_env_state);
5673     if( setjmp(_break_jump) )
5674     {
5675 #if !defined(AE_NO_EXCEPTIONS)
5676         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5677 #else
5678         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5679         return 0;
5680 #endif
5681     }
5682     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5683     if( _xparams.flags!=0x0 )
5684         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5685     ae_bool result = alglib_impl::sparserewriteexisting(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, j, v, &_alglib_env_state);
5686     alglib_impl::ae_state_clear(&_alglib_env_state);
5687     return *(reinterpret_cast<bool*>(&result));
5688 }
5689 
5690 /*************************************************************************
5691 This function returns I-th row of the sparse matrix. Matrix must be stored
5692 in CRS or SKS format.
5693 
5694 INPUT PARAMETERS:
5695     S           -   sparse M*N matrix in CRS format
5696     I           -   row index, 0<=I<M
5697     IRow        -   output buffer, can be  preallocated.  In  case  buffer
5698                     size  is  too  small  to  store  I-th   row,   it   is
5699                     automatically reallocated.
5700 
5701 OUTPUT PARAMETERS:
5702     IRow        -   array[M], I-th row.
5703 
5704 NOTE: this function has O(N) running time, where N is a  column  count. It
5705       allocates and fills N-element  array,  even  although  most  of  its
5706       elemets are zero.
5707 
5708 NOTE: If you have O(non-zeros-per-row) time and memory  requirements,  use
5709       SparseGetCompressedRow() function. It  returns  data  in  compressed
5710       format.
5711 
5712 NOTE: when  incorrect  I  (outside  of  [0,M-1]) or  matrix (non  CRS/SKS)
5713       is passed, this function throws exception.
5714 
5715   -- ALGLIB PROJECT --
5716      Copyright 10.12.2014 by Bochkanov Sergey
5717 *************************************************************************/
sparsegetrow(const sparsematrix & s,const ae_int_t i,real_1d_array & irow,const xparams _xparams)5718 void sparsegetrow(const sparsematrix &s, const ae_int_t i, real_1d_array &irow, const xparams _xparams)
5719 {
5720     jmp_buf _break_jump;
5721     alglib_impl::ae_state _alglib_env_state;
5722     alglib_impl::ae_state_init(&_alglib_env_state);
5723     if( setjmp(_break_jump) )
5724     {
5725 #if !defined(AE_NO_EXCEPTIONS)
5726         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5727 #else
5728         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5729         return;
5730 #endif
5731     }
5732     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5733     if( _xparams.flags!=0x0 )
5734         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5735     alglib_impl::sparsegetrow(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, const_cast<alglib_impl::ae_vector*>(irow.c_ptr()), &_alglib_env_state);
5736     alglib_impl::ae_state_clear(&_alglib_env_state);
5737     return;
5738 }
5739 
5740 /*************************************************************************
5741 This function returns I-th row of the sparse matrix IN COMPRESSED FORMAT -
5742 only non-zero elements are returned (with their indexes). Matrix  must  be
5743 stored in CRS or SKS format.
5744 
5745 INPUT PARAMETERS:
5746     S           -   sparse M*N matrix in CRS format
5747     I           -   row index, 0<=I<M
5748     ColIdx      -   output buffer for column indexes, can be preallocated.
5749                     In case buffer size is too small to store I-th row, it
5750                     is automatically reallocated.
5751     Vals        -   output buffer for values, can be preallocated. In case
5752                     buffer size is too small to  store  I-th  row,  it  is
5753                     automatically reallocated.
5754 
5755 OUTPUT PARAMETERS:
5756     ColIdx      -   column   indexes   of  non-zero  elements,  sorted  by
5757                     ascending. Symbolically non-zero elements are  counted
5758                     (i.e. if you allocated place for element, but  it  has
5759                     zero numerical value - it is counted).
5760     Vals        -   values. Vals[K] stores value of  matrix  element  with
5761                     indexes (I,ColIdx[K]). Symbolically non-zero  elements
5762                     are counted (i.e. if you allocated place for  element,
5763                     but it has zero numerical value - it is counted).
5764     NZCnt       -   number of symbolically non-zero elements per row.
5765 
5766 NOTE: when  incorrect  I  (outside  of  [0,M-1]) or  matrix (non  CRS/SKS)
5767       is passed, this function throws exception.
5768 
5769 NOTE: this function may allocate additional, unnecessary place for  ColIdx
5770       and Vals arrays. It is dictated by  performance  reasons  -  on  SKS
5771       matrices it is faster  to  allocate  space  at  the  beginning  with
5772       some "extra"-space, than performing two passes over matrix  -  first
5773       time to calculate exact space required for data, second  time  -  to
5774       store data itself.
5775 
5776   -- ALGLIB PROJECT --
5777      Copyright 10.12.2014 by Bochkanov Sergey
5778 *************************************************************************/
sparsegetcompressedrow(const sparsematrix & s,const ae_int_t i,integer_1d_array & colidx,real_1d_array & vals,ae_int_t & nzcnt,const xparams _xparams)5779 void sparsegetcompressedrow(const sparsematrix &s, const ae_int_t i, integer_1d_array &colidx, real_1d_array &vals, ae_int_t &nzcnt, const xparams _xparams)
5780 {
5781     jmp_buf _break_jump;
5782     alglib_impl::ae_state _alglib_env_state;
5783     alglib_impl::ae_state_init(&_alglib_env_state);
5784     if( setjmp(_break_jump) )
5785     {
5786 #if !defined(AE_NO_EXCEPTIONS)
5787         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5788 #else
5789         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5790         return;
5791 #endif
5792     }
5793     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5794     if( _xparams.flags!=0x0 )
5795         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5796     alglib_impl::sparsegetcompressedrow(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), i, const_cast<alglib_impl::ae_vector*>(colidx.c_ptr()), const_cast<alglib_impl::ae_vector*>(vals.c_ptr()), &nzcnt, &_alglib_env_state);
5797     alglib_impl::ae_state_clear(&_alglib_env_state);
5798     return;
5799 }
5800 
5801 /*************************************************************************
5802 This function performs efficient in-place  transpose  of  SKS  matrix.  No
5803 additional memory is allocated during transposition.
5804 
5805 This function supports only skyline storage format (SKS).
5806 
5807 INPUT PARAMETERS
5808     S       -   sparse matrix in SKS format.
5809 
5810 OUTPUT PARAMETERS
5811     S           -   sparse matrix, transposed.
5812 
5813   -- ALGLIB PROJECT --
5814      Copyright 16.01.2014 by Bochkanov Sergey
5815 *************************************************************************/
sparsetransposesks(const sparsematrix & s,const xparams _xparams)5816 void sparsetransposesks(const sparsematrix &s, const xparams _xparams)
5817 {
5818     jmp_buf _break_jump;
5819     alglib_impl::ae_state _alglib_env_state;
5820     alglib_impl::ae_state_init(&_alglib_env_state);
5821     if( setjmp(_break_jump) )
5822     {
5823 #if !defined(AE_NO_EXCEPTIONS)
5824         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5825 #else
5826         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5827         return;
5828 #endif
5829     }
5830     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5831     if( _xparams.flags!=0x0 )
5832         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5833     alglib_impl::sparsetransposesks(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
5834     alglib_impl::ae_state_clear(&_alglib_env_state);
5835     return;
5836 }
5837 
5838 /*************************************************************************
5839 This function performs transpose of CRS matrix.
5840 
5841 INPUT PARAMETERS
5842     S       -   sparse matrix in CRS format.
5843 
5844 OUTPUT PARAMETERS
5845     S           -   sparse matrix, transposed.
5846 
5847 NOTE: internal  temporary  copy  is  allocated   for   the   purposes   of
5848       transposition. It is deallocated after transposition.
5849 
5850   -- ALGLIB PROJECT --
5851      Copyright 30.01.2018 by Bochkanov Sergey
5852 *************************************************************************/
sparsetransposecrs(const sparsematrix & s,const xparams _xparams)5853 void sparsetransposecrs(const sparsematrix &s, const xparams _xparams)
5854 {
5855     jmp_buf _break_jump;
5856     alglib_impl::ae_state _alglib_env_state;
5857     alglib_impl::ae_state_init(&_alglib_env_state);
5858     if( setjmp(_break_jump) )
5859     {
5860 #if !defined(AE_NO_EXCEPTIONS)
5861         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5862 #else
5863         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5864         return;
5865 #endif
5866     }
5867     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5868     if( _xparams.flags!=0x0 )
5869         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5870     alglib_impl::sparsetransposecrs(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
5871     alglib_impl::ae_state_clear(&_alglib_env_state);
5872     return;
5873 }
5874 
5875 /*************************************************************************
5876 This function performs copying with transposition of CRS matrix.
5877 
5878 INPUT PARAMETERS
5879     S0      -   sparse matrix in CRS format.
5880 
5881 OUTPUT PARAMETERS
5882     S1      -   sparse matrix, transposed
5883 
5884   -- ALGLIB PROJECT --
5885      Copyright 23.07.2018 by Bochkanov Sergey
5886 *************************************************************************/
sparsecopytransposecrs(const sparsematrix & s0,sparsematrix & s1,const xparams _xparams)5887 void sparsecopytransposecrs(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
5888 {
5889     jmp_buf _break_jump;
5890     alglib_impl::ae_state _alglib_env_state;
5891     alglib_impl::ae_state_init(&_alglib_env_state);
5892     if( setjmp(_break_jump) )
5893     {
5894 #if !defined(AE_NO_EXCEPTIONS)
5895         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5896 #else
5897         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5898         return;
5899 #endif
5900     }
5901     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5902     if( _xparams.flags!=0x0 )
5903         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5904     alglib_impl::sparsecopytransposecrs(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
5905     alglib_impl::ae_state_clear(&_alglib_env_state);
5906     return;
5907 }
5908 
5909 /*************************************************************************
5910 This function performs copying with transposition of CRS matrix  (buffered
5911 version which reuses memory already allocated by  the  target as  much  as
5912 possible).
5913 
5914 INPUT PARAMETERS
5915     S0      -   sparse matrix in CRS format.
5916 
5917 OUTPUT PARAMETERS
5918     S1      -   sparse matrix, transposed; previously allocated memory  is
5919                 reused if possible.
5920 
5921   -- ALGLIB PROJECT --
5922      Copyright 23.07.2018 by Bochkanov Sergey
5923 *************************************************************************/
sparsecopytransposecrsbuf(const sparsematrix & s0,const sparsematrix & s1,const xparams _xparams)5924 void sparsecopytransposecrsbuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
5925 {
5926     jmp_buf _break_jump;
5927     alglib_impl::ae_state _alglib_env_state;
5928     alglib_impl::ae_state_init(&_alglib_env_state);
5929     if( setjmp(_break_jump) )
5930     {
5931 #if !defined(AE_NO_EXCEPTIONS)
5932         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5933 #else
5934         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5935         return;
5936 #endif
5937     }
5938     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5939     if( _xparams.flags!=0x0 )
5940         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5941     alglib_impl::sparsecopytransposecrsbuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
5942     alglib_impl::ae_state_clear(&_alglib_env_state);
5943     return;
5944 }
5945 
5946 /*************************************************************************
5947 This  function  performs  in-place  conversion  to  desired sparse storage
5948 format.
5949 
5950 INPUT PARAMETERS
5951     S0      -   sparse matrix in any format.
5952     Fmt     -   desired storage format  of  the  output,  as  returned  by
5953                 SparseGetMatrixType() function:
5954                 * 0 for hash-based storage
5955                 * 1 for CRS
5956                 * 2 for SKS
5957 
5958 OUTPUT PARAMETERS
5959     S0          -   sparse matrix in requested format.
5960 
5961 NOTE: in-place conversion wastes a lot of memory which is  used  to  store
5962       temporaries.  If  you  perform  a  lot  of  repeated conversions, we
5963       recommend to use out-of-place buffered  conversion  functions,  like
5964       SparseCopyToBuf(), which can reuse already allocated memory.
5965 
5966   -- ALGLIB PROJECT --
5967      Copyright 16.01.2014 by Bochkanov Sergey
5968 *************************************************************************/
sparseconvertto(const sparsematrix & s0,const ae_int_t fmt,const xparams _xparams)5969 void sparseconvertto(const sparsematrix &s0, const ae_int_t fmt, const xparams _xparams)
5970 {
5971     jmp_buf _break_jump;
5972     alglib_impl::ae_state _alglib_env_state;
5973     alglib_impl::ae_state_init(&_alglib_env_state);
5974     if( setjmp(_break_jump) )
5975     {
5976 #if !defined(AE_NO_EXCEPTIONS)
5977         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5978 #else
5979         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5980         return;
5981 #endif
5982     }
5983     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5984     if( _xparams.flags!=0x0 )
5985         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5986     alglib_impl::sparseconvertto(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), fmt, &_alglib_env_state);
5987     alglib_impl::ae_state_clear(&_alglib_env_state);
5988     return;
5989 }
5990 
5991 /*************************************************************************
5992 This  function  performs out-of-place conversion to desired sparse storage
5993 format. S0 is copied to S1 and converted on-the-fly. Memory  allocated  in
5994 S1 is reused to maximum extent possible.
5995 
5996 INPUT PARAMETERS
5997     S0      -   sparse matrix in any format.
5998     Fmt     -   desired storage format  of  the  output,  as  returned  by
5999                 SparseGetMatrixType() function:
6000                 * 0 for hash-based storage
6001                 * 1 for CRS
6002                 * 2 for SKS
6003 
6004 OUTPUT PARAMETERS
6005     S1          -   sparse matrix in requested format.
6006 
6007   -- ALGLIB PROJECT --
6008      Copyright 16.01.2014 by Bochkanov Sergey
6009 *************************************************************************/
sparsecopytobuf(const sparsematrix & s0,const ae_int_t fmt,const sparsematrix & s1,const xparams _xparams)6010 void sparsecopytobuf(const sparsematrix &s0, const ae_int_t fmt, const sparsematrix &s1, const xparams _xparams)
6011 {
6012     jmp_buf _break_jump;
6013     alglib_impl::ae_state _alglib_env_state;
6014     alglib_impl::ae_state_init(&_alglib_env_state);
6015     if( setjmp(_break_jump) )
6016     {
6017 #if !defined(AE_NO_EXCEPTIONS)
6018         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6019 #else
6020         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6021         return;
6022 #endif
6023     }
6024     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6025     if( _xparams.flags!=0x0 )
6026         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6027     alglib_impl::sparsecopytobuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), fmt, const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
6028     alglib_impl::ae_state_clear(&_alglib_env_state);
6029     return;
6030 }
6031 
6032 /*************************************************************************
6033 This function performs in-place conversion to Hash table storage.
6034 
6035 INPUT PARAMETERS
6036     S           -   sparse matrix in CRS format.
6037 
6038 OUTPUT PARAMETERS
6039     S           -   sparse matrix in Hash table format.
6040 
6041 NOTE: this  function  has   no  effect  when  called with matrix which  is
6042       already in Hash table mode.
6043 
6044 NOTE: in-place conversion involves allocation of temporary arrays. If  you
6045       perform a lot of repeated in- place  conversions,  it  may  lead  to
6046       memory fragmentation. Consider using out-of-place SparseCopyToHashBuf()
6047       function in this case.
6048 
6049   -- ALGLIB PROJECT --
6050      Copyright 20.07.2012 by Bochkanov Sergey
6051 *************************************************************************/
sparseconverttohash(const sparsematrix & s,const xparams _xparams)6052 void sparseconverttohash(const sparsematrix &s, const xparams _xparams)
6053 {
6054     jmp_buf _break_jump;
6055     alglib_impl::ae_state _alglib_env_state;
6056     alglib_impl::ae_state_init(&_alglib_env_state);
6057     if( setjmp(_break_jump) )
6058     {
6059 #if !defined(AE_NO_EXCEPTIONS)
6060         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6061 #else
6062         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6063         return;
6064 #endif
6065     }
6066     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6067     if( _xparams.flags!=0x0 )
6068         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6069     alglib_impl::sparseconverttohash(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6070     alglib_impl::ae_state_clear(&_alglib_env_state);
6071     return;
6072 }
6073 
6074 /*************************************************************************
6075 This  function  performs  out-of-place  conversion  to  Hash table storage
6076 format. S0 is copied to S1 and converted on-the-fly.
6077 
6078 INPUT PARAMETERS
6079     S0          -   sparse matrix in any format.
6080 
6081 OUTPUT PARAMETERS
6082     S1          -   sparse matrix in Hash table format.
6083 
6084 NOTE: if S0 is stored as Hash-table, it is just copied without conversion.
6085 
6086 NOTE: this function de-allocates memory  occupied  by  S1 before  starting
6087       conversion. If you perform a  lot  of  repeated  conversions, it may
6088       lead to memory fragmentation. In this case we recommend you  to  use
6089       SparseCopyToHashBuf() function which re-uses memory in S1 as much as
6090       possible.
6091 
6092   -- ALGLIB PROJECT --
6093      Copyright 20.07.2012 by Bochkanov Sergey
6094 *************************************************************************/
sparsecopytohash(const sparsematrix & s0,sparsematrix & s1,const xparams _xparams)6095 void sparsecopytohash(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
6096 {
6097     jmp_buf _break_jump;
6098     alglib_impl::ae_state _alglib_env_state;
6099     alglib_impl::ae_state_init(&_alglib_env_state);
6100     if( setjmp(_break_jump) )
6101     {
6102 #if !defined(AE_NO_EXCEPTIONS)
6103         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6104 #else
6105         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6106         return;
6107 #endif
6108     }
6109     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6110     if( _xparams.flags!=0x0 )
6111         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6112     alglib_impl::sparsecopytohash(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
6113     alglib_impl::ae_state_clear(&_alglib_env_state);
6114     return;
6115 }
6116 
6117 /*************************************************************************
6118 This  function  performs  out-of-place  conversion  to  Hash table storage
6119 format. S0 is copied to S1 and converted on-the-fly. Memory  allocated  in
6120 S1 is reused to maximum extent possible.
6121 
6122 INPUT PARAMETERS
6123     S0          -   sparse matrix in any format.
6124 
6125 OUTPUT PARAMETERS
6126     S1          -   sparse matrix in Hash table format.
6127 
6128 NOTE: if S0 is stored as Hash-table, it is just copied without conversion.
6129 
6130   -- ALGLIB PROJECT --
6131      Copyright 20.07.2012 by Bochkanov Sergey
6132 *************************************************************************/
sparsecopytohashbuf(const sparsematrix & s0,const sparsematrix & s1,const xparams _xparams)6133 void sparsecopytohashbuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
6134 {
6135     jmp_buf _break_jump;
6136     alglib_impl::ae_state _alglib_env_state;
6137     alglib_impl::ae_state_init(&_alglib_env_state);
6138     if( setjmp(_break_jump) )
6139     {
6140 #if !defined(AE_NO_EXCEPTIONS)
6141         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6142 #else
6143         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6144         return;
6145 #endif
6146     }
6147     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6148     if( _xparams.flags!=0x0 )
6149         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6150     alglib_impl::sparsecopytohashbuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
6151     alglib_impl::ae_state_clear(&_alglib_env_state);
6152     return;
6153 }
6154 
6155 /*************************************************************************
6156 This function converts matrix to CRS format.
6157 
6158 Some  algorithms  (linear  algebra ones, for example) require matrices in
6159 CRS format. This function allows to perform in-place conversion.
6160 
6161 INPUT PARAMETERS
6162     S           -   sparse M*N matrix in any format
6163 
6164 OUTPUT PARAMETERS
6165     S           -   matrix in CRS format
6166 
6167 NOTE: this   function  has  no  effect  when  called with matrix which is
6168       already in CRS mode.
6169 
6170 NOTE: this function allocates temporary memory to store a   copy  of  the
6171       matrix. If you perform a lot of repeated conversions, we  recommend
6172       you  to  use  SparseCopyToCRSBuf()  function,   which   can   reuse
6173       previously allocated memory.
6174 
6175   -- ALGLIB PROJECT --
6176      Copyright 14.10.2011 by Bochkanov Sergey
6177 *************************************************************************/
sparseconverttocrs(const sparsematrix & s,const xparams _xparams)6178 void sparseconverttocrs(const sparsematrix &s, const xparams _xparams)
6179 {
6180     jmp_buf _break_jump;
6181     alglib_impl::ae_state _alglib_env_state;
6182     alglib_impl::ae_state_init(&_alglib_env_state);
6183     if( setjmp(_break_jump) )
6184     {
6185 #if !defined(AE_NO_EXCEPTIONS)
6186         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6187 #else
6188         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6189         return;
6190 #endif
6191     }
6192     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6193     if( _xparams.flags!=0x0 )
6194         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6195     alglib_impl::sparseconverttocrs(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6196     alglib_impl::ae_state_clear(&_alglib_env_state);
6197     return;
6198 }
6199 
6200 /*************************************************************************
6201 This  function  performs  out-of-place  conversion  to  CRS format.  S0 is
6202 copied to S1 and converted on-the-fly.
6203 
6204 INPUT PARAMETERS
6205     S0          -   sparse matrix in any format.
6206 
6207 OUTPUT PARAMETERS
6208     S1          -   sparse matrix in CRS format.
6209 
6210 NOTE: if S0 is stored as CRS, it is just copied without conversion.
6211 
6212 NOTE: this function de-allocates memory occupied by S1 before starting CRS
6213       conversion. If you perform a lot of repeated CRS conversions, it may
6214       lead to memory fragmentation. In this case we recommend you  to  use
6215       SparseCopyToCRSBuf() function which re-uses memory in S1 as much  as
6216       possible.
6217 
6218   -- ALGLIB PROJECT --
6219      Copyright 20.07.2012 by Bochkanov Sergey
6220 *************************************************************************/
sparsecopytocrs(const sparsematrix & s0,sparsematrix & s1,const xparams _xparams)6221 void sparsecopytocrs(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
6222 {
6223     jmp_buf _break_jump;
6224     alglib_impl::ae_state _alglib_env_state;
6225     alglib_impl::ae_state_init(&_alglib_env_state);
6226     if( setjmp(_break_jump) )
6227     {
6228 #if !defined(AE_NO_EXCEPTIONS)
6229         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6230 #else
6231         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6232         return;
6233 #endif
6234     }
6235     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6236     if( _xparams.flags!=0x0 )
6237         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6238     alglib_impl::sparsecopytocrs(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
6239     alglib_impl::ae_state_clear(&_alglib_env_state);
6240     return;
6241 }
6242 
6243 /*************************************************************************
6244 This  function  performs  out-of-place  conversion  to  CRS format.  S0 is
6245 copied to S1 and converted on-the-fly. Memory allocated in S1 is reused to
6246 maximum extent possible.
6247 
6248 INPUT PARAMETERS
6249     S0          -   sparse matrix in any format.
6250     S1          -   matrix which may contain some pre-allocated memory, or
6251                     can be just uninitialized structure.
6252 
6253 OUTPUT PARAMETERS
6254     S1          -   sparse matrix in CRS format.
6255 
6256 NOTE: if S0 is stored as CRS, it is just copied without conversion.
6257 
6258   -- ALGLIB PROJECT --
6259      Copyright 20.07.2012 by Bochkanov Sergey
6260 *************************************************************************/
sparsecopytocrsbuf(const sparsematrix & s0,const sparsematrix & s1,const xparams _xparams)6261 void sparsecopytocrsbuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
6262 {
6263     jmp_buf _break_jump;
6264     alglib_impl::ae_state _alglib_env_state;
6265     alglib_impl::ae_state_init(&_alglib_env_state);
6266     if( setjmp(_break_jump) )
6267     {
6268 #if !defined(AE_NO_EXCEPTIONS)
6269         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6270 #else
6271         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6272         return;
6273 #endif
6274     }
6275     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6276     if( _xparams.flags!=0x0 )
6277         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6278     alglib_impl::sparsecopytocrsbuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
6279     alglib_impl::ae_state_clear(&_alglib_env_state);
6280     return;
6281 }
6282 
6283 /*************************************************************************
6284 This function performs in-place conversion to SKS format.
6285 
6286 INPUT PARAMETERS
6287     S           -   sparse matrix in any format.
6288 
6289 OUTPUT PARAMETERS
6290     S           -   sparse matrix in SKS format.
6291 
6292 NOTE: this  function  has   no  effect  when  called with matrix which  is
6293       already in SKS mode.
6294 
6295 NOTE: in-place conversion involves allocation of temporary arrays. If  you
6296       perform a lot of repeated in- place  conversions,  it  may  lead  to
6297       memory fragmentation. Consider using out-of-place SparseCopyToSKSBuf()
6298       function in this case.
6299 
6300   -- ALGLIB PROJECT --
6301      Copyright 15.01.2014 by Bochkanov Sergey
6302 *************************************************************************/
sparseconverttosks(const sparsematrix & s,const xparams _xparams)6303 void sparseconverttosks(const sparsematrix &s, const xparams _xparams)
6304 {
6305     jmp_buf _break_jump;
6306     alglib_impl::ae_state _alglib_env_state;
6307     alglib_impl::ae_state_init(&_alglib_env_state);
6308     if( setjmp(_break_jump) )
6309     {
6310 #if !defined(AE_NO_EXCEPTIONS)
6311         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6312 #else
6313         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6314         return;
6315 #endif
6316     }
6317     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6318     if( _xparams.flags!=0x0 )
6319         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6320     alglib_impl::sparseconverttosks(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6321     alglib_impl::ae_state_clear(&_alglib_env_state);
6322     return;
6323 }
6324 
6325 /*************************************************************************
6326 This  function  performs  out-of-place  conversion  to SKS storage format.
6327 S0 is copied to S1 and converted on-the-fly.
6328 
6329 INPUT PARAMETERS
6330     S0          -   sparse matrix in any format.
6331 
6332 OUTPUT PARAMETERS
6333     S1          -   sparse matrix in SKS format.
6334 
6335 NOTE: if S0 is stored as SKS, it is just copied without conversion.
6336 
6337 NOTE: this function de-allocates memory  occupied  by  S1 before  starting
6338       conversion. If you perform a  lot  of  repeated  conversions, it may
6339       lead to memory fragmentation. In this case we recommend you  to  use
6340       SparseCopyToSKSBuf() function which re-uses memory in S1 as much  as
6341       possible.
6342 
6343   -- ALGLIB PROJECT --
6344      Copyright 20.07.2012 by Bochkanov Sergey
6345 *************************************************************************/
sparsecopytosks(const sparsematrix & s0,sparsematrix & s1,const xparams _xparams)6346 void sparsecopytosks(const sparsematrix &s0, sparsematrix &s1, const xparams _xparams)
6347 {
6348     jmp_buf _break_jump;
6349     alglib_impl::ae_state _alglib_env_state;
6350     alglib_impl::ae_state_init(&_alglib_env_state);
6351     if( setjmp(_break_jump) )
6352     {
6353 #if !defined(AE_NO_EXCEPTIONS)
6354         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6355 #else
6356         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6357         return;
6358 #endif
6359     }
6360     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6361     if( _xparams.flags!=0x0 )
6362         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6363     alglib_impl::sparsecopytosks(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
6364     alglib_impl::ae_state_clear(&_alglib_env_state);
6365     return;
6366 }
6367 
6368 /*************************************************************************
6369 This  function  performs  out-of-place  conversion  to SKS format.  S0  is
6370 copied to S1 and converted on-the-fly. Memory  allocated  in S1 is  reused
6371 to maximum extent possible.
6372 
6373 INPUT PARAMETERS
6374     S0          -   sparse matrix in any format.
6375 
6376 OUTPUT PARAMETERS
6377     S1          -   sparse matrix in SKS format.
6378 
6379 NOTE: if S0 is stored as SKS, it is just copied without conversion.
6380 
6381   -- ALGLIB PROJECT --
6382      Copyright 20.07.2012 by Bochkanov Sergey
6383 *************************************************************************/
sparsecopytosksbuf(const sparsematrix & s0,const sparsematrix & s1,const xparams _xparams)6384 void sparsecopytosksbuf(const sparsematrix &s0, const sparsematrix &s1, const xparams _xparams)
6385 {
6386     jmp_buf _break_jump;
6387     alglib_impl::ae_state _alglib_env_state;
6388     alglib_impl::ae_state_init(&_alglib_env_state);
6389     if( setjmp(_break_jump) )
6390     {
6391 #if !defined(AE_NO_EXCEPTIONS)
6392         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6393 #else
6394         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6395         return;
6396 #endif
6397     }
6398     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6399     if( _xparams.flags!=0x0 )
6400         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6401     alglib_impl::sparsecopytosksbuf(const_cast<alglib_impl::sparsematrix*>(s0.c_ptr()), const_cast<alglib_impl::sparsematrix*>(s1.c_ptr()), &_alglib_env_state);
6402     alglib_impl::ae_state_clear(&_alglib_env_state);
6403     return;
6404 }
6405 
6406 /*************************************************************************
6407 This function returns type of the matrix storage format.
6408 
6409 INPUT PARAMETERS:
6410     S           -   sparse matrix.
6411 
6412 RESULT:
6413     sparse storage format used by matrix:
6414         0   -   Hash-table
6415         1   -   CRS (compressed row storage)
6416         2   -   SKS (skyline)
6417 
6418 NOTE: future  versions  of  ALGLIB  may  include additional sparse storage
6419       formats.
6420 
6421 
6422   -- ALGLIB PROJECT --
6423      Copyright 20.07.2012 by Bochkanov Sergey
6424 *************************************************************************/
sparsegetmatrixtype(const sparsematrix & s,const xparams _xparams)6425 ae_int_t sparsegetmatrixtype(const sparsematrix &s, const xparams _xparams)
6426 {
6427     jmp_buf _break_jump;
6428     alglib_impl::ae_state _alglib_env_state;
6429     alglib_impl::ae_state_init(&_alglib_env_state);
6430     if( setjmp(_break_jump) )
6431     {
6432 #if !defined(AE_NO_EXCEPTIONS)
6433         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6434 #else
6435         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6436         return 0;
6437 #endif
6438     }
6439     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6440     if( _xparams.flags!=0x0 )
6441         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6442     alglib_impl::ae_int_t result = alglib_impl::sparsegetmatrixtype(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6443     alglib_impl::ae_state_clear(&_alglib_env_state);
6444     return *(reinterpret_cast<ae_int_t*>(&result));
6445 }
6446 
6447 /*************************************************************************
6448 This function checks matrix storage format and returns True when matrix is
6449 stored using Hash table representation.
6450 
6451 INPUT PARAMETERS:
6452     S   -   sparse matrix.
6453 
6454 RESULT:
6455     True if matrix type is Hash table
6456     False if matrix type is not Hash table
6457 
6458   -- ALGLIB PROJECT --
6459      Copyright 20.07.2012 by Bochkanov Sergey
6460 *************************************************************************/
sparseishash(const sparsematrix & s,const xparams _xparams)6461 bool sparseishash(const sparsematrix &s, const xparams _xparams)
6462 {
6463     jmp_buf _break_jump;
6464     alglib_impl::ae_state _alglib_env_state;
6465     alglib_impl::ae_state_init(&_alglib_env_state);
6466     if( setjmp(_break_jump) )
6467     {
6468 #if !defined(AE_NO_EXCEPTIONS)
6469         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6470 #else
6471         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6472         return 0;
6473 #endif
6474     }
6475     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6476     if( _xparams.flags!=0x0 )
6477         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6478     ae_bool result = alglib_impl::sparseishash(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6479     alglib_impl::ae_state_clear(&_alglib_env_state);
6480     return *(reinterpret_cast<bool*>(&result));
6481 }
6482 
6483 /*************************************************************************
6484 This function checks matrix storage format and returns True when matrix is
6485 stored using CRS representation.
6486 
6487 INPUT PARAMETERS:
6488     S   -   sparse matrix.
6489 
6490 RESULT:
6491     True if matrix type is CRS
6492     False if matrix type is not CRS
6493 
6494   -- ALGLIB PROJECT --
6495      Copyright 20.07.2012 by Bochkanov Sergey
6496 *************************************************************************/
sparseiscrs(const sparsematrix & s,const xparams _xparams)6497 bool sparseiscrs(const sparsematrix &s, const xparams _xparams)
6498 {
6499     jmp_buf _break_jump;
6500     alglib_impl::ae_state _alglib_env_state;
6501     alglib_impl::ae_state_init(&_alglib_env_state);
6502     if( setjmp(_break_jump) )
6503     {
6504 #if !defined(AE_NO_EXCEPTIONS)
6505         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6506 #else
6507         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6508         return 0;
6509 #endif
6510     }
6511     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6512     if( _xparams.flags!=0x0 )
6513         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6514     ae_bool result = alglib_impl::sparseiscrs(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6515     alglib_impl::ae_state_clear(&_alglib_env_state);
6516     return *(reinterpret_cast<bool*>(&result));
6517 }
6518 
6519 /*************************************************************************
6520 This function checks matrix storage format and returns True when matrix is
6521 stored using SKS representation.
6522 
6523 INPUT PARAMETERS:
6524     S   -   sparse matrix.
6525 
6526 RESULT:
6527     True if matrix type is SKS
6528     False if matrix type is not SKS
6529 
6530   -- ALGLIB PROJECT --
6531      Copyright 20.07.2012 by Bochkanov Sergey
6532 *************************************************************************/
sparseissks(const sparsematrix & s,const xparams _xparams)6533 bool sparseissks(const sparsematrix &s, const xparams _xparams)
6534 {
6535     jmp_buf _break_jump;
6536     alglib_impl::ae_state _alglib_env_state;
6537     alglib_impl::ae_state_init(&_alglib_env_state);
6538     if( setjmp(_break_jump) )
6539     {
6540 #if !defined(AE_NO_EXCEPTIONS)
6541         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6542 #else
6543         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6544         return 0;
6545 #endif
6546     }
6547     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6548     if( _xparams.flags!=0x0 )
6549         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6550     ae_bool result = alglib_impl::sparseissks(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6551     alglib_impl::ae_state_clear(&_alglib_env_state);
6552     return *(reinterpret_cast<bool*>(&result));
6553 }
6554 
6555 /*************************************************************************
6556 The function frees all memory occupied by  sparse  matrix.  Sparse  matrix
6557 structure becomes unusable after this call.
6558 
6559 OUTPUT PARAMETERS
6560     S   -   sparse matrix to delete
6561 
6562   -- ALGLIB PROJECT --
6563      Copyright 24.07.2012 by Bochkanov Sergey
6564 *************************************************************************/
sparsefree(sparsematrix & s,const xparams _xparams)6565 void sparsefree(sparsematrix &s, const xparams _xparams)
6566 {
6567     jmp_buf _break_jump;
6568     alglib_impl::ae_state _alglib_env_state;
6569     alglib_impl::ae_state_init(&_alglib_env_state);
6570     if( setjmp(_break_jump) )
6571     {
6572 #if !defined(AE_NO_EXCEPTIONS)
6573         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6574 #else
6575         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6576         return;
6577 #endif
6578     }
6579     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6580     if( _xparams.flags!=0x0 )
6581         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6582     alglib_impl::sparsefree(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6583     alglib_impl::ae_state_clear(&_alglib_env_state);
6584     return;
6585 }
6586 
6587 /*************************************************************************
6588 The function returns number of rows of a sparse matrix.
6589 
6590 RESULT: number of rows of a sparse matrix.
6591 
6592   -- ALGLIB PROJECT --
6593      Copyright 23.08.2012 by Bochkanov Sergey
6594 *************************************************************************/
sparsegetnrows(const sparsematrix & s,const xparams _xparams)6595 ae_int_t sparsegetnrows(const sparsematrix &s, const xparams _xparams)
6596 {
6597     jmp_buf _break_jump;
6598     alglib_impl::ae_state _alglib_env_state;
6599     alglib_impl::ae_state_init(&_alglib_env_state);
6600     if( setjmp(_break_jump) )
6601     {
6602 #if !defined(AE_NO_EXCEPTIONS)
6603         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6604 #else
6605         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6606         return 0;
6607 #endif
6608     }
6609     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6610     if( _xparams.flags!=0x0 )
6611         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6612     alglib_impl::ae_int_t result = alglib_impl::sparsegetnrows(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6613     alglib_impl::ae_state_clear(&_alglib_env_state);
6614     return *(reinterpret_cast<ae_int_t*>(&result));
6615 }
6616 
6617 /*************************************************************************
6618 The function returns number of columns of a sparse matrix.
6619 
6620 RESULT: number of columns of a sparse matrix.
6621 
6622   -- ALGLIB PROJECT --
6623      Copyright 23.08.2012 by Bochkanov Sergey
6624 *************************************************************************/
sparsegetncols(const sparsematrix & s,const xparams _xparams)6625 ae_int_t sparsegetncols(const sparsematrix &s, const xparams _xparams)
6626 {
6627     jmp_buf _break_jump;
6628     alglib_impl::ae_state _alglib_env_state;
6629     alglib_impl::ae_state_init(&_alglib_env_state);
6630     if( setjmp(_break_jump) )
6631     {
6632 #if !defined(AE_NO_EXCEPTIONS)
6633         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6634 #else
6635         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6636         return 0;
6637 #endif
6638     }
6639     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6640     if( _xparams.flags!=0x0 )
6641         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6642     alglib_impl::ae_int_t result = alglib_impl::sparsegetncols(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6643     alglib_impl::ae_state_clear(&_alglib_env_state);
6644     return *(reinterpret_cast<ae_int_t*>(&result));
6645 }
6646 
6647 /*************************************************************************
6648 The function returns number of strictly upper triangular non-zero elements
6649 in  the  matrix.  It  counts  SYMBOLICALLY non-zero elements, i.e. entries
6650 in the sparse matrix data structure. If some element  has  zero  numerical
6651 value, it is still counted.
6652 
6653 This function has different cost for different types of matrices:
6654 * for hash-based matrices it involves complete pass over entire hash-table
6655   with O(NNZ) cost, where NNZ is number of non-zero elements
6656 * for CRS and SKS matrix types cost of counting is O(N) (N - matrix size).
6657 
6658 RESULT: number of non-zero elements strictly above main diagonal
6659 
6660   -- ALGLIB PROJECT --
6661      Copyright 12.02.2014 by Bochkanov Sergey
6662 *************************************************************************/
sparsegetuppercount(const sparsematrix & s,const xparams _xparams)6663 ae_int_t sparsegetuppercount(const sparsematrix &s, const xparams _xparams)
6664 {
6665     jmp_buf _break_jump;
6666     alglib_impl::ae_state _alglib_env_state;
6667     alglib_impl::ae_state_init(&_alglib_env_state);
6668     if( setjmp(_break_jump) )
6669     {
6670 #if !defined(AE_NO_EXCEPTIONS)
6671         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6672 #else
6673         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6674         return 0;
6675 #endif
6676     }
6677     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6678     if( _xparams.flags!=0x0 )
6679         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6680     alglib_impl::ae_int_t result = alglib_impl::sparsegetuppercount(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6681     alglib_impl::ae_state_clear(&_alglib_env_state);
6682     return *(reinterpret_cast<ae_int_t*>(&result));
6683 }
6684 
6685 /*************************************************************************
6686 The function returns number of strictly lower triangular non-zero elements
6687 in  the  matrix.  It  counts  SYMBOLICALLY non-zero elements, i.e. entries
6688 in the sparse matrix data structure. If some element  has  zero  numerical
6689 value, it is still counted.
6690 
6691 This function has different cost for different types of matrices:
6692 * for hash-based matrices it involves complete pass over entire hash-table
6693   with O(NNZ) cost, where NNZ is number of non-zero elements
6694 * for CRS and SKS matrix types cost of counting is O(N) (N - matrix size).
6695 
6696 RESULT: number of non-zero elements strictly below main diagonal
6697 
6698   -- ALGLIB PROJECT --
6699      Copyright 12.02.2014 by Bochkanov Sergey
6700 *************************************************************************/
sparsegetlowercount(const sparsematrix & s,const xparams _xparams)6701 ae_int_t sparsegetlowercount(const sparsematrix &s, const xparams _xparams)
6702 {
6703     jmp_buf _break_jump;
6704     alglib_impl::ae_state _alglib_env_state;
6705     alglib_impl::ae_state_init(&_alglib_env_state);
6706     if( setjmp(_break_jump) )
6707     {
6708 #if !defined(AE_NO_EXCEPTIONS)
6709         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6710 #else
6711         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6712         return 0;
6713 #endif
6714     }
6715     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6716     if( _xparams.flags!=0x0 )
6717         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6718     alglib_impl::ae_int_t result = alglib_impl::sparsegetlowercount(const_cast<alglib_impl::sparsematrix*>(s.c_ptr()), &_alglib_env_state);
6719     alglib_impl::ae_state_clear(&_alglib_env_state);
6720     return *(reinterpret_cast<ae_int_t*>(&result));
6721 }
6722 #endif
6723 
6724 #if defined(AE_COMPILE_HSSCHUR) || !defined(AE_PARTIAL_BUILD)
6725 
6726 #endif
6727 
6728 #if defined(AE_COMPILE_EVD) || !defined(AE_PARTIAL_BUILD)
6729 /*************************************************************************
6730 This object stores state of the subspace iteration algorithm.
6731 
6732 You should use ALGLIB functions to work with this object.
6733 *************************************************************************/
_eigsubspacestate_owner()6734 _eigsubspacestate_owner::_eigsubspacestate_owner()
6735 {
6736     jmp_buf _break_jump;
6737     alglib_impl::ae_state _state;
6738 
6739     alglib_impl::ae_state_init(&_state);
6740     if( setjmp(_break_jump) )
6741     {
6742         if( p_struct!=NULL )
6743         {
6744             alglib_impl::_eigsubspacestate_destroy(p_struct);
6745             alglib_impl::ae_free(p_struct);
6746         }
6747         p_struct = NULL;
6748 #if !defined(AE_NO_EXCEPTIONS)
6749         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6750 #else
6751         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6752         return;
6753 #endif
6754     }
6755     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6756     p_struct = NULL;
6757     p_struct = (alglib_impl::eigsubspacestate*)alglib_impl::ae_malloc(sizeof(alglib_impl::eigsubspacestate), &_state);
6758     memset(p_struct, 0, sizeof(alglib_impl::eigsubspacestate));
6759     alglib_impl::_eigsubspacestate_init(p_struct, &_state, ae_false);
6760     ae_state_clear(&_state);
6761 }
6762 
_eigsubspacestate_owner(const _eigsubspacestate_owner & rhs)6763 _eigsubspacestate_owner::_eigsubspacestate_owner(const _eigsubspacestate_owner &rhs)
6764 {
6765     jmp_buf _break_jump;
6766     alglib_impl::ae_state _state;
6767 
6768     alglib_impl::ae_state_init(&_state);
6769     if( setjmp(_break_jump) )
6770     {
6771         if( p_struct!=NULL )
6772         {
6773             alglib_impl::_eigsubspacestate_destroy(p_struct);
6774             alglib_impl::ae_free(p_struct);
6775         }
6776         p_struct = NULL;
6777 #if !defined(AE_NO_EXCEPTIONS)
6778         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6779 #else
6780         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6781         return;
6782 #endif
6783     }
6784     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6785     p_struct = NULL;
6786     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: eigsubspacestate copy constructor failure (source is not initialized)", &_state);
6787     p_struct = (alglib_impl::eigsubspacestate*)alglib_impl::ae_malloc(sizeof(alglib_impl::eigsubspacestate), &_state);
6788     memset(p_struct, 0, sizeof(alglib_impl::eigsubspacestate));
6789     alglib_impl::_eigsubspacestate_init_copy(p_struct, const_cast<alglib_impl::eigsubspacestate*>(rhs.p_struct), &_state, ae_false);
6790     ae_state_clear(&_state);
6791 }
6792 
operator =(const _eigsubspacestate_owner & rhs)6793 _eigsubspacestate_owner& _eigsubspacestate_owner::operator=(const _eigsubspacestate_owner &rhs)
6794 {
6795     if( this==&rhs )
6796         return *this;
6797     jmp_buf _break_jump;
6798     alglib_impl::ae_state _state;
6799 
6800     alglib_impl::ae_state_init(&_state);
6801     if( setjmp(_break_jump) )
6802     {
6803 #if !defined(AE_NO_EXCEPTIONS)
6804         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6805 #else
6806         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6807         return *this;
6808 #endif
6809     }
6810     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6811     alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: eigsubspacestate assignment constructor failure (destination is not initialized)", &_state);
6812     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: eigsubspacestate assignment constructor failure (source is not initialized)", &_state);
6813     alglib_impl::_eigsubspacestate_destroy(p_struct);
6814     memset(p_struct, 0, sizeof(alglib_impl::eigsubspacestate));
6815     alglib_impl::_eigsubspacestate_init_copy(p_struct, const_cast<alglib_impl::eigsubspacestate*>(rhs.p_struct), &_state, ae_false);
6816     ae_state_clear(&_state);
6817     return *this;
6818 }
6819 
~_eigsubspacestate_owner()6820 _eigsubspacestate_owner::~_eigsubspacestate_owner()
6821 {
6822     if( p_struct!=NULL )
6823     {
6824         alglib_impl::_eigsubspacestate_destroy(p_struct);
6825         ae_free(p_struct);
6826     }
6827 }
6828 
c_ptr()6829 alglib_impl::eigsubspacestate* _eigsubspacestate_owner::c_ptr()
6830 {
6831     return p_struct;
6832 }
6833 
c_ptr() const6834 alglib_impl::eigsubspacestate* _eigsubspacestate_owner::c_ptr() const
6835 {
6836     return const_cast<alglib_impl::eigsubspacestate*>(p_struct);
6837 }
eigsubspacestate()6838 eigsubspacestate::eigsubspacestate() : _eigsubspacestate_owner()
6839 {
6840 }
6841 
eigsubspacestate(const eigsubspacestate & rhs)6842 eigsubspacestate::eigsubspacestate(const eigsubspacestate &rhs):_eigsubspacestate_owner(rhs)
6843 {
6844 }
6845 
operator =(const eigsubspacestate & rhs)6846 eigsubspacestate& eigsubspacestate::operator=(const eigsubspacestate &rhs)
6847 {
6848     if( this==&rhs )
6849         return *this;
6850     _eigsubspacestate_owner::operator=(rhs);
6851     return *this;
6852 }
6853 
~eigsubspacestate()6854 eigsubspacestate::~eigsubspacestate()
6855 {
6856 }
6857 
6858 
6859 /*************************************************************************
6860 This object stores state of the subspace iteration algorithm.
6861 
6862 You should use ALGLIB functions to work with this object.
6863 *************************************************************************/
_eigsubspacereport_owner()6864 _eigsubspacereport_owner::_eigsubspacereport_owner()
6865 {
6866     jmp_buf _break_jump;
6867     alglib_impl::ae_state _state;
6868 
6869     alglib_impl::ae_state_init(&_state);
6870     if( setjmp(_break_jump) )
6871     {
6872         if( p_struct!=NULL )
6873         {
6874             alglib_impl::_eigsubspacereport_destroy(p_struct);
6875             alglib_impl::ae_free(p_struct);
6876         }
6877         p_struct = NULL;
6878 #if !defined(AE_NO_EXCEPTIONS)
6879         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6880 #else
6881         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6882         return;
6883 #endif
6884     }
6885     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6886     p_struct = NULL;
6887     p_struct = (alglib_impl::eigsubspacereport*)alglib_impl::ae_malloc(sizeof(alglib_impl::eigsubspacereport), &_state);
6888     memset(p_struct, 0, sizeof(alglib_impl::eigsubspacereport));
6889     alglib_impl::_eigsubspacereport_init(p_struct, &_state, ae_false);
6890     ae_state_clear(&_state);
6891 }
6892 
_eigsubspacereport_owner(const _eigsubspacereport_owner & rhs)6893 _eigsubspacereport_owner::_eigsubspacereport_owner(const _eigsubspacereport_owner &rhs)
6894 {
6895     jmp_buf _break_jump;
6896     alglib_impl::ae_state _state;
6897 
6898     alglib_impl::ae_state_init(&_state);
6899     if( setjmp(_break_jump) )
6900     {
6901         if( p_struct!=NULL )
6902         {
6903             alglib_impl::_eigsubspacereport_destroy(p_struct);
6904             alglib_impl::ae_free(p_struct);
6905         }
6906         p_struct = NULL;
6907 #if !defined(AE_NO_EXCEPTIONS)
6908         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6909 #else
6910         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6911         return;
6912 #endif
6913     }
6914     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6915     p_struct = NULL;
6916     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: eigsubspacereport copy constructor failure (source is not initialized)", &_state);
6917     p_struct = (alglib_impl::eigsubspacereport*)alglib_impl::ae_malloc(sizeof(alglib_impl::eigsubspacereport), &_state);
6918     memset(p_struct, 0, sizeof(alglib_impl::eigsubspacereport));
6919     alglib_impl::_eigsubspacereport_init_copy(p_struct, const_cast<alglib_impl::eigsubspacereport*>(rhs.p_struct), &_state, ae_false);
6920     ae_state_clear(&_state);
6921 }
6922 
operator =(const _eigsubspacereport_owner & rhs)6923 _eigsubspacereport_owner& _eigsubspacereport_owner::operator=(const _eigsubspacereport_owner &rhs)
6924 {
6925     if( this==&rhs )
6926         return *this;
6927     jmp_buf _break_jump;
6928     alglib_impl::ae_state _state;
6929 
6930     alglib_impl::ae_state_init(&_state);
6931     if( setjmp(_break_jump) )
6932     {
6933 #if !defined(AE_NO_EXCEPTIONS)
6934         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6935 #else
6936         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6937         return *this;
6938 #endif
6939     }
6940     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6941     alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: eigsubspacereport assignment constructor failure (destination is not initialized)", &_state);
6942     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: eigsubspacereport assignment constructor failure (source is not initialized)", &_state);
6943     alglib_impl::_eigsubspacereport_destroy(p_struct);
6944     memset(p_struct, 0, sizeof(alglib_impl::eigsubspacereport));
6945     alglib_impl::_eigsubspacereport_init_copy(p_struct, const_cast<alglib_impl::eigsubspacereport*>(rhs.p_struct), &_state, ae_false);
6946     ae_state_clear(&_state);
6947     return *this;
6948 }
6949 
~_eigsubspacereport_owner()6950 _eigsubspacereport_owner::~_eigsubspacereport_owner()
6951 {
6952     if( p_struct!=NULL )
6953     {
6954         alglib_impl::_eigsubspacereport_destroy(p_struct);
6955         ae_free(p_struct);
6956     }
6957 }
6958 
c_ptr()6959 alglib_impl::eigsubspacereport* _eigsubspacereport_owner::c_ptr()
6960 {
6961     return p_struct;
6962 }
6963 
c_ptr() const6964 alglib_impl::eigsubspacereport* _eigsubspacereport_owner::c_ptr() const
6965 {
6966     return const_cast<alglib_impl::eigsubspacereport*>(p_struct);
6967 }
eigsubspacereport()6968 eigsubspacereport::eigsubspacereport() : _eigsubspacereport_owner() ,iterationscount(p_struct->iterationscount)
6969 {
6970 }
6971 
eigsubspacereport(const eigsubspacereport & rhs)6972 eigsubspacereport::eigsubspacereport(const eigsubspacereport &rhs):_eigsubspacereport_owner(rhs) ,iterationscount(p_struct->iterationscount)
6973 {
6974 }
6975 
operator =(const eigsubspacereport & rhs)6976 eigsubspacereport& eigsubspacereport::operator=(const eigsubspacereport &rhs)
6977 {
6978     if( this==&rhs )
6979         return *this;
6980     _eigsubspacereport_owner::operator=(rhs);
6981     return *this;
6982 }
6983 
~eigsubspacereport()6984 eigsubspacereport::~eigsubspacereport()
6985 {
6986 }
6987 
6988 /*************************************************************************
6989 This function initializes subspace iteration solver. This solver  is  used
6990 to solve symmetric real eigenproblems where just a few (top K) eigenvalues
6991 and corresponding eigenvectors is required.
6992 
6993 This solver can be significantly faster than  complete  EVD  decomposition
6994 in the following case:
6995 * when only just a small fraction  of  top  eigenpairs  of dense matrix is
6996   required. When K approaches N, this solver is slower than complete dense
6997   EVD
6998 * when problem matrix is sparse (and/or is not known explicitly, i.e. only
6999   matrix-matrix product can be performed)
7000 
7001 USAGE (explicit dense/sparse matrix):
7002 1. User initializes algorithm state with eigsubspacecreate() call
7003 2. [optional] User tunes solver parameters by calling eigsubspacesetcond()
7004    or other functions
7005 3. User  calls  eigsubspacesolvedense() or eigsubspacesolvesparse() methods,
7006    which take algorithm state and 2D array or alglib.sparsematrix object.
7007 
7008 USAGE (out-of-core mode):
7009 1. User initializes algorithm state with eigsubspacecreate() call
7010 2. [optional] User tunes solver parameters by calling eigsubspacesetcond()
7011    or other functions
7012 3. User activates out-of-core mode of  the  solver  and  repeatedly  calls
7013    communication functions in a loop like below:
7014    > alglib.eigsubspaceoocstart(state)
7015    > while alglib.eigsubspaceooccontinue(state) do
7016    >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
7017    >     alglib.eigsubspaceoocgetrequestdata(state, out X)
7018    >     [calculate  Y=A*X, with X=R^NxM]
7019    >     alglib.eigsubspaceoocsendresult(state, in Y)
7020    > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
7021 
7022 INPUT PARAMETERS:
7023     N       -   problem dimensionality, N>0
7024     K       -   number of top eigenvector to calculate, 0<K<=N.
7025 
7026 OUTPUT PARAMETERS:
7027     State   -   structure which stores algorithm state
7028 
7029 NOTE: if you solve many similar EVD problems you may  find  it  useful  to
7030       reuse previous subspace as warm-start point for new EVD problem.  It
7031       can be done with eigsubspacesetwarmstart() function.
7032 
7033   -- ALGLIB --
7034      Copyright 16.01.2017 by Bochkanov Sergey
7035 *************************************************************************/
eigsubspacecreate(const ae_int_t n,const ae_int_t k,eigsubspacestate & state,const xparams _xparams)7036 void eigsubspacecreate(const ae_int_t n, const ae_int_t k, eigsubspacestate &state, const xparams _xparams)
7037 {
7038     jmp_buf _break_jump;
7039     alglib_impl::ae_state _alglib_env_state;
7040     alglib_impl::ae_state_init(&_alglib_env_state);
7041     if( setjmp(_break_jump) )
7042     {
7043 #if !defined(AE_NO_EXCEPTIONS)
7044         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7045 #else
7046         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7047         return;
7048 #endif
7049     }
7050     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7051     if( _xparams.flags!=0x0 )
7052         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7053     alglib_impl::eigsubspacecreate(n, k, const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), &_alglib_env_state);
7054     alglib_impl::ae_state_clear(&_alglib_env_state);
7055     return;
7056 }
7057 
7058 /*************************************************************************
7059 Buffered version of constructor which aims to reuse  previously  allocated
7060 memory as much as possible.
7061 
7062   -- ALGLIB --
7063      Copyright 16.01.2017 by Bochkanov Sergey
7064 *************************************************************************/
eigsubspacecreatebuf(const ae_int_t n,const ae_int_t k,const eigsubspacestate & state,const xparams _xparams)7065 void eigsubspacecreatebuf(const ae_int_t n, const ae_int_t k, const eigsubspacestate &state, const xparams _xparams)
7066 {
7067     jmp_buf _break_jump;
7068     alglib_impl::ae_state _alglib_env_state;
7069     alglib_impl::ae_state_init(&_alglib_env_state);
7070     if( setjmp(_break_jump) )
7071     {
7072 #if !defined(AE_NO_EXCEPTIONS)
7073         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7074 #else
7075         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7076         return;
7077 #endif
7078     }
7079     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7080     if( _xparams.flags!=0x0 )
7081         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7082     alglib_impl::eigsubspacecreatebuf(n, k, const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), &_alglib_env_state);
7083     alglib_impl::ae_state_clear(&_alglib_env_state);
7084     return;
7085 }
7086 
7087 /*************************************************************************
7088 This function sets stopping critera for the solver:
7089 * error in eigenvector/value allowed by solver
7090 * maximum number of iterations to perform
7091 
7092 INPUT PARAMETERS:
7093     State       -   solver structure
7094     Eps         -   eps>=0,  with non-zero value used to tell solver  that
7095                     it can  stop  after  all  eigenvalues  converged  with
7096                     error  roughly  proportional  to  eps*MAX(LAMBDA_MAX),
7097                     where LAMBDA_MAX is a maximum eigenvalue.
7098                     Zero  value  means  that  no  check  for  precision is
7099                     performed.
7100     MaxIts      -   maxits>=0,  with non-zero value used  to  tell  solver
7101                     that it can stop after maxits  steps  (no  matter  how
7102                     precise current estimate is)
7103 
7104 NOTE: passing  eps=0  and  maxits=0  results  in  automatic  selection  of
7105       moderate eps as stopping criteria (1.0E-6 in current implementation,
7106       but it may change without notice).
7107 
7108 NOTE: very small values of eps are possible (say, 1.0E-12),  although  the
7109       larger problem you solve (N and/or K), the  harder  it  is  to  find
7110       precise eigenvectors because rounding errors tend to accumulate.
7111 
7112 NOTE: passing non-zero eps results in  some performance  penalty,  roughly
7113       equal to 2N*(2K)^2 FLOPs per iteration. These additional computations
7114       are required in order to estimate current error in  eigenvalues  via
7115       Rayleigh-Ritz process.
7116       Most of this additional time is  spent  in  construction  of  ~2Kx2K
7117       symmetric  subproblem  whose  eigenvalues  are  checked  with  exact
7118       eigensolver.
7119       This additional time is negligible if you search for eigenvalues  of
7120       the large dense matrix, but may become noticeable on  highly  sparse
7121       EVD problems, where cost of matrix-matrix product is low.
7122       If you set eps to exactly zero,  Rayleigh-Ritz  phase  is completely
7123       turned off.
7124 
7125   -- ALGLIB --
7126      Copyright 16.01.2017 by Bochkanov Sergey
7127 *************************************************************************/
eigsubspacesetcond(const eigsubspacestate & state,const double eps,const ae_int_t maxits,const xparams _xparams)7128 void eigsubspacesetcond(const eigsubspacestate &state, const double eps, const ae_int_t maxits, const xparams _xparams)
7129 {
7130     jmp_buf _break_jump;
7131     alglib_impl::ae_state _alglib_env_state;
7132     alglib_impl::ae_state_init(&_alglib_env_state);
7133     if( setjmp(_break_jump) )
7134     {
7135 #if !defined(AE_NO_EXCEPTIONS)
7136         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7137 #else
7138         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7139         return;
7140 #endif
7141     }
7142     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7143     if( _xparams.flags!=0x0 )
7144         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7145     alglib_impl::eigsubspacesetcond(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), eps, maxits, &_alglib_env_state);
7146     alglib_impl::ae_state_clear(&_alglib_env_state);
7147     return;
7148 }
7149 
7150 /*************************************************************************
7151 This function sets warm-start mode of the solver: next call to the  solver
7152 will reuse previous subspace as warm-start  point.  It  can  significantly
7153 speed-up convergence when you solve many similar eigenproblems.
7154 
7155 INPUT PARAMETERS:
7156     State       -   solver structure
7157     UseWarmStart-   either True or False
7158 
7159   -- ALGLIB --
7160      Copyright 12.11.2017 by Bochkanov Sergey
7161 *************************************************************************/
eigsubspacesetwarmstart(const eigsubspacestate & state,const bool usewarmstart,const xparams _xparams)7162 void eigsubspacesetwarmstart(const eigsubspacestate &state, const bool usewarmstart, const xparams _xparams)
7163 {
7164     jmp_buf _break_jump;
7165     alglib_impl::ae_state _alglib_env_state;
7166     alglib_impl::ae_state_init(&_alglib_env_state);
7167     if( setjmp(_break_jump) )
7168     {
7169 #if !defined(AE_NO_EXCEPTIONS)
7170         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7171 #else
7172         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7173         return;
7174 #endif
7175     }
7176     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7177     if( _xparams.flags!=0x0 )
7178         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7179     alglib_impl::eigsubspacesetwarmstart(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), usewarmstart, &_alglib_env_state);
7180     alglib_impl::ae_state_clear(&_alglib_env_state);
7181     return;
7182 }
7183 
7184 /*************************************************************************
7185 This  function  initiates  out-of-core  mode  of  subspace eigensolver. It
7186 should be used in conjunction with other out-of-core-related functions  of
7187 this subspackage in a loop like below:
7188 
7189 > alglib.eigsubspaceoocstart(state)
7190 > while alglib.eigsubspaceooccontinue(state) do
7191 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
7192 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
7193 >     [calculate  Y=A*X, with X=R^NxM]
7194 >     alglib.eigsubspaceoocsendresult(state, in Y)
7195 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
7196 
7197 INPUT PARAMETERS:
7198     State       -   solver object
7199     MType       -   matrix type:
7200                     * 0 for real  symmetric  matrix  (solver  assumes that
7201                       matrix  being   processed  is  symmetric;  symmetric
7202                       direct eigensolver is used for  smaller  subproblems
7203                       arising during solution of larger "full" task)
7204                     Future versions of ALGLIB may  introduce  support  for
7205                     other  matrix   types;   for   now,   only   symmetric
7206                     eigenproblems are supported.
7207 
7208 
7209   -- ALGLIB --
7210      Copyright 16.01.2017 by Bochkanov Sergey
7211 *************************************************************************/
eigsubspaceoocstart(const eigsubspacestate & state,const ae_int_t mtype,const xparams _xparams)7212 void eigsubspaceoocstart(const eigsubspacestate &state, const ae_int_t mtype, const xparams _xparams)
7213 {
7214     jmp_buf _break_jump;
7215     alglib_impl::ae_state _alglib_env_state;
7216     alglib_impl::ae_state_init(&_alglib_env_state);
7217     if( setjmp(_break_jump) )
7218     {
7219 #if !defined(AE_NO_EXCEPTIONS)
7220         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7221 #else
7222         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7223         return;
7224 #endif
7225     }
7226     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7227     if( _xparams.flags!=0x0 )
7228         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7229     alglib_impl::eigsubspaceoocstart(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), mtype, &_alglib_env_state);
7230     alglib_impl::ae_state_clear(&_alglib_env_state);
7231     return;
7232 }
7233 
7234 /*************************************************************************
7235 This function performs subspace iteration  in  the  out-of-core  mode.  It
7236 should be used in conjunction with other out-of-core-related functions  of
7237 this subspackage in a loop like below:
7238 
7239 > alglib.eigsubspaceoocstart(state)
7240 > while alglib.eigsubspaceooccontinue(state) do
7241 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
7242 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
7243 >     [calculate  Y=A*X, with X=R^NxM]
7244 >     alglib.eigsubspaceoocsendresult(state, in Y)
7245 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
7246 
7247 
7248   -- ALGLIB --
7249      Copyright 16.01.2017 by Bochkanov Sergey
7250 *************************************************************************/
eigsubspaceooccontinue(const eigsubspacestate & state,const xparams _xparams)7251 bool eigsubspaceooccontinue(const eigsubspacestate &state, const xparams _xparams)
7252 {
7253     jmp_buf _break_jump;
7254     alglib_impl::ae_state _alglib_env_state;
7255     alglib_impl::ae_state_init(&_alglib_env_state);
7256     if( setjmp(_break_jump) )
7257     {
7258 #if !defined(AE_NO_EXCEPTIONS)
7259         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7260 #else
7261         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7262         return 0;
7263 #endif
7264     }
7265     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7266     if( _xparams.flags!=0x0 )
7267         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7268     ae_bool result = alglib_impl::eigsubspaceooccontinue(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), &_alglib_env_state);
7269     alglib_impl::ae_state_clear(&_alglib_env_state);
7270     return *(reinterpret_cast<bool*>(&result));
7271 }
7272 
7273 /*************************************************************************
7274 This function is used to retrieve information  about  out-of-core  request
7275 sent by solver to user code: request type (current version  of  the solver
7276 sends only requests for matrix-matrix products) and request size (size  of
7277 the matrices being multiplied).
7278 
7279 This function returns just request metrics; in order  to  get contents  of
7280 the matrices being multiplied, use eigsubspaceoocgetrequestdata().
7281 
7282 It should be used in conjunction with other out-of-core-related  functions
7283 of this subspackage in a loop like below:
7284 
7285 > alglib.eigsubspaceoocstart(state)
7286 > while alglib.eigsubspaceooccontinue(state) do
7287 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
7288 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
7289 >     [calculate  Y=A*X, with X=R^NxM]
7290 >     alglib.eigsubspaceoocsendresult(state, in Y)
7291 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
7292 
7293 INPUT PARAMETERS:
7294     State           -   solver running in out-of-core mode
7295 
7296 OUTPUT PARAMETERS:
7297     RequestType     -   type of the request to process:
7298                         * 0 - for matrix-matrix product A*X, with A  being
7299                           NxN matrix whose eigenvalues/vectors are needed,
7300                           and X being NxREQUESTSIZE one which is  returned
7301                           by the eigsubspaceoocgetrequestdata().
7302     RequestSize     -   size of the X matrix (number of columns),  usually
7303                         it is several times larger than number of  vectors
7304                         K requested by user.
7305 
7306 
7307   -- ALGLIB --
7308      Copyright 16.01.2017 by Bochkanov Sergey
7309 *************************************************************************/
eigsubspaceoocgetrequestinfo(const eigsubspacestate & state,ae_int_t & requesttype,ae_int_t & requestsize,const xparams _xparams)7310 void eigsubspaceoocgetrequestinfo(const eigsubspacestate &state, ae_int_t &requesttype, ae_int_t &requestsize, const xparams _xparams)
7311 {
7312     jmp_buf _break_jump;
7313     alglib_impl::ae_state _alglib_env_state;
7314     alglib_impl::ae_state_init(&_alglib_env_state);
7315     if( setjmp(_break_jump) )
7316     {
7317 #if !defined(AE_NO_EXCEPTIONS)
7318         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7319 #else
7320         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7321         return;
7322 #endif
7323     }
7324     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7325     if( _xparams.flags!=0x0 )
7326         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7327     alglib_impl::eigsubspaceoocgetrequestinfo(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), &requesttype, &requestsize, &_alglib_env_state);
7328     alglib_impl::ae_state_clear(&_alglib_env_state);
7329     return;
7330 }
7331 
7332 /*************************************************************************
7333 This function is used to retrieve information  about  out-of-core  request
7334 sent by solver to user code: matrix X (array[N,RequestSize) which have  to
7335 be multiplied by out-of-core matrix A in a product A*X.
7336 
7337 This function returns just request data; in order to get size of  the data
7338 prior to processing requestm, use eigsubspaceoocgetrequestinfo().
7339 
7340 It should be used in conjunction with other out-of-core-related  functions
7341 of this subspackage in a loop like below:
7342 
7343 > alglib.eigsubspaceoocstart(state)
7344 > while alglib.eigsubspaceooccontinue(state) do
7345 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
7346 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
7347 >     [calculate  Y=A*X, with X=R^NxM]
7348 >     alglib.eigsubspaceoocsendresult(state, in Y)
7349 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
7350 
7351 INPUT PARAMETERS:
7352     State           -   solver running in out-of-core mode
7353     X               -   possibly  preallocated   storage;  reallocated  if
7354                         needed, left unchanged, if large enough  to  store
7355                         request data.
7356 
7357 OUTPUT PARAMETERS:
7358     X               -   array[N,RequestSize] or larger, leading  rectangle
7359                         is filled with dense matrix X.
7360 
7361 
7362   -- ALGLIB --
7363      Copyright 16.01.2017 by Bochkanov Sergey
7364 *************************************************************************/
eigsubspaceoocgetrequestdata(const eigsubspacestate & state,real_2d_array & x,const xparams _xparams)7365 void eigsubspaceoocgetrequestdata(const eigsubspacestate &state, real_2d_array &x, const xparams _xparams)
7366 {
7367     jmp_buf _break_jump;
7368     alglib_impl::ae_state _alglib_env_state;
7369     alglib_impl::ae_state_init(&_alglib_env_state);
7370     if( setjmp(_break_jump) )
7371     {
7372 #if !defined(AE_NO_EXCEPTIONS)
7373         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7374 #else
7375         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7376         return;
7377 #endif
7378     }
7379     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7380     if( _xparams.flags!=0x0 )
7381         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7382     alglib_impl::eigsubspaceoocgetrequestdata(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), &_alglib_env_state);
7383     alglib_impl::ae_state_clear(&_alglib_env_state);
7384     return;
7385 }
7386 
7387 /*************************************************************************
7388 This function is used to send user reply to out-of-core  request  sent  by
7389 solver. Usually it is product A*X for returned by solver matrix X.
7390 
7391 It should be used in conjunction with other out-of-core-related  functions
7392 of this subspackage in a loop like below:
7393 
7394 > alglib.eigsubspaceoocstart(state)
7395 > while alglib.eigsubspaceooccontinue(state) do
7396 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
7397 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
7398 >     [calculate  Y=A*X, with X=R^NxM]
7399 >     alglib.eigsubspaceoocsendresult(state, in Y)
7400 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
7401 
7402 INPUT PARAMETERS:
7403     State           -   solver running in out-of-core mode
7404     AX              -   array[N,RequestSize] or larger, leading  rectangle
7405                         is filled with product A*X.
7406 
7407 
7408   -- ALGLIB --
7409      Copyright 16.01.2017 by Bochkanov Sergey
7410 *************************************************************************/
eigsubspaceoocsendresult(const eigsubspacestate & state,const real_2d_array & ax,const xparams _xparams)7411 void eigsubspaceoocsendresult(const eigsubspacestate &state, const real_2d_array &ax, const xparams _xparams)
7412 {
7413     jmp_buf _break_jump;
7414     alglib_impl::ae_state _alglib_env_state;
7415     alglib_impl::ae_state_init(&_alglib_env_state);
7416     if( setjmp(_break_jump) )
7417     {
7418 #if !defined(AE_NO_EXCEPTIONS)
7419         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7420 #else
7421         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7422         return;
7423 #endif
7424     }
7425     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7426     if( _xparams.flags!=0x0 )
7427         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7428     alglib_impl::eigsubspaceoocsendresult(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(ax.c_ptr()), &_alglib_env_state);
7429     alglib_impl::ae_state_clear(&_alglib_env_state);
7430     return;
7431 }
7432 
7433 /*************************************************************************
7434 This  function  finalizes out-of-core  mode  of  subspace eigensolver.  It
7435 should be used in conjunction with other out-of-core-related functions  of
7436 this subspackage in a loop like below:
7437 
7438 > alglib.eigsubspaceoocstart(state)
7439 > while alglib.eigsubspaceooccontinue(state) do
7440 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
7441 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
7442 >     [calculate  Y=A*X, with X=R^NxM]
7443 >     alglib.eigsubspaceoocsendresult(state, in Y)
7444 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
7445 
7446 INPUT PARAMETERS:
7447     State       -   solver state
7448 
7449 OUTPUT PARAMETERS:
7450     W           -   array[K], depending on solver settings:
7451                     * top  K  eigenvalues ordered  by  descending   -   if
7452                       eigenvectors are returned in Z
7453                     * zeros - if invariant subspace is returned in Z
7454     Z           -   array[N,K], depending on solver settings either:
7455                     * matrix of eigenvectors found
7456                     * orthogonal basis of K-dimensional invariant subspace
7457     Rep         -   report with additional parameters
7458 
7459   -- ALGLIB --
7460      Copyright 16.01.2017 by Bochkanov Sergey
7461 *************************************************************************/
eigsubspaceoocstop(const eigsubspacestate & state,real_1d_array & w,real_2d_array & z,eigsubspacereport & rep,const xparams _xparams)7462 void eigsubspaceoocstop(const eigsubspacestate &state, real_1d_array &w, real_2d_array &z, eigsubspacereport &rep, const xparams _xparams)
7463 {
7464     jmp_buf _break_jump;
7465     alglib_impl::ae_state _alglib_env_state;
7466     alglib_impl::ae_state_init(&_alglib_env_state);
7467     if( setjmp(_break_jump) )
7468     {
7469 #if !defined(AE_NO_EXCEPTIONS)
7470         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7471 #else
7472         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7473         return;
7474 #endif
7475     }
7476     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7477     if( _xparams.flags!=0x0 )
7478         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7479     alglib_impl::eigsubspaceoocstop(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), const_cast<alglib_impl::eigsubspacereport*>(rep.c_ptr()), &_alglib_env_state);
7480     alglib_impl::ae_state_clear(&_alglib_env_state);
7481     return;
7482 }
7483 
7484 /*************************************************************************
7485 This  function runs subspace eigensolver for dense NxN symmetric matrix A,
7486 given by its upper or lower triangle.
7487 
7488 This function can not process nonsymmetric matrices.
7489 
7490 INPUT PARAMETERS:
7491     State       -   solver state
7492     A           -   array[N,N], symmetric NxN matrix given by one  of  its
7493                     triangles
7494     IsUpper     -   whether upper or lower triangle of  A  is  given  (the
7495                     other one is not referenced at all).
7496 
7497 OUTPUT PARAMETERS:
7498     W           -   array[K], top  K  eigenvalues ordered  by   descending
7499                     of their absolute values
7500     Z           -   array[N,K], matrix of eigenvectors found
7501     Rep         -   report with additional parameters
7502 
7503 NOTE: internally this function allocates a copy of NxN dense A. You should
7504       take it into account when working with very large matrices occupying
7505       almost all RAM.
7506 
7507   ! FREE EDITION OF ALGLIB:
7508   !
7509   ! Free Edition of ALGLIB supports following important features for  this
7510   ! function:
7511   ! * C++ version: x64 SIMD support using C++ intrinsics
7512   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
7513   !
7514   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
7515   ! Reference Manual in order  to  find  out  how to activate SIMD support
7516   ! in ALGLIB.
7517 
7518   ! COMMERCIAL EDITION OF ALGLIB:
7519   !
7520   ! Commercial Edition of ALGLIB includes following important improvements
7521   ! of this function:
7522   ! * high-performance native backend with same C# interface (C# version)
7523   ! * multithreading support (C++ and C# versions)
7524   ! * hardware vendor (Intel) implementations of linear algebra primitives
7525   !   (C++ and C# versions, x86/x64 platform)
7526   !
7527   ! We recommend you to read 'Working with commercial version' section  of
7528   ! ALGLIB Reference Manual in order to find out how to  use  performance-
7529   ! related features provided by commercial edition of ALGLIB.
7530 
7531   -- ALGLIB --
7532      Copyright 16.01.2017 by Bochkanov Sergey
7533 *************************************************************************/
eigsubspacesolvedenses(const eigsubspacestate & state,const real_2d_array & a,const bool isupper,real_1d_array & w,real_2d_array & z,eigsubspacereport & rep,const xparams _xparams)7534 void eigsubspacesolvedenses(const eigsubspacestate &state, const real_2d_array &a, const bool isupper, real_1d_array &w, real_2d_array &z, eigsubspacereport &rep, const xparams _xparams)
7535 {
7536     jmp_buf _break_jump;
7537     alglib_impl::ae_state _alglib_env_state;
7538     alglib_impl::ae_state_init(&_alglib_env_state);
7539     if( setjmp(_break_jump) )
7540     {
7541 #if !defined(AE_NO_EXCEPTIONS)
7542         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7543 #else
7544         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7545         return;
7546 #endif
7547     }
7548     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7549     if( _xparams.flags!=0x0 )
7550         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7551     alglib_impl::eigsubspacesolvedenses(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), const_cast<alglib_impl::eigsubspacereport*>(rep.c_ptr()), &_alglib_env_state);
7552     alglib_impl::ae_state_clear(&_alglib_env_state);
7553     return;
7554 }
7555 
7556 /*************************************************************************
7557 This  function runs eigensolver for dense NxN symmetric matrix A, given by
7558 upper or lower triangle.
7559 
7560 This function can not process nonsymmetric matrices.
7561 
7562 INPUT PARAMETERS:
7563     State       -   solver state
7564     A           -   NxN symmetric matrix given by one of its triangles
7565     IsUpper     -   whether upper or lower triangle of  A  is  given  (the
7566                     other one is not referenced at all).
7567 
7568 OUTPUT PARAMETERS:
7569     W           -   array[K], top  K  eigenvalues ordered  by   descending
7570                     of their absolute values
7571     Z           -   array[N,K], matrix of eigenvectors found
7572     Rep         -   report with additional parameters
7573 
7574   -- ALGLIB --
7575      Copyright 16.01.2017 by Bochkanov Sergey
7576 *************************************************************************/
eigsubspacesolvesparses(const eigsubspacestate & state,const sparsematrix & a,const bool isupper,real_1d_array & w,real_2d_array & z,eigsubspacereport & rep,const xparams _xparams)7577 void eigsubspacesolvesparses(const eigsubspacestate &state, const sparsematrix &a, const bool isupper, real_1d_array &w, real_2d_array &z, eigsubspacereport &rep, const xparams _xparams)
7578 {
7579     jmp_buf _break_jump;
7580     alglib_impl::ae_state _alglib_env_state;
7581     alglib_impl::ae_state_init(&_alglib_env_state);
7582     if( setjmp(_break_jump) )
7583     {
7584 #if !defined(AE_NO_EXCEPTIONS)
7585         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7586 #else
7587         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7588         return;
7589 #endif
7590     }
7591     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7592     if( _xparams.flags!=0x0 )
7593         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7594     alglib_impl::eigsubspacesolvesparses(const_cast<alglib_impl::eigsubspacestate*>(state.c_ptr()), const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), const_cast<alglib_impl::eigsubspacereport*>(rep.c_ptr()), &_alglib_env_state);
7595     alglib_impl::ae_state_clear(&_alglib_env_state);
7596     return;
7597 }
7598 
7599 /*************************************************************************
7600 Finding the eigenvalues and eigenvectors of a symmetric matrix
7601 
7602 The algorithm finds eigen pairs of a symmetric matrix by reducing it to
7603 tridiagonal form and using the QL/QR algorithm.
7604 
7605   ! COMMERCIAL EDITION OF ALGLIB:
7606   !
7607   ! Commercial Edition of ALGLIB includes following important improvements
7608   ! of this function:
7609   ! * high-performance native backend with same C# interface (C# version)
7610   ! * hardware vendor (Intel) implementations of linear algebra primitives
7611   !   (C++ and C# versions, x86/x64 platform)
7612   !
7613   ! We recommend you to read 'Working with commercial version' section  of
7614   ! ALGLIB Reference Manual in order to find out how to  use  performance-
7615   ! related features provided by commercial edition of ALGLIB.
7616 
7617 Input parameters:
7618     A       -   symmetric matrix which is given by its upper or lower
7619                 triangular part.
7620                 Array whose indexes range within [0..N-1, 0..N-1].
7621     N       -   size of matrix A.
7622     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
7623                 If ZNeeded is equal to:
7624                  * 0, the eigenvectors are not returned;
7625                  * 1, the eigenvectors are returned.
7626     IsUpper -   storage format.
7627 
7628 Output parameters:
7629     D       -   eigenvalues in ascending order.
7630                 Array whose index ranges within [0..N-1].
7631     Z       -   if ZNeeded is equal to:
7632                  * 0, Z hasn't changed;
7633                  * 1, Z contains the eigenvectors.
7634                 Array whose indexes range within [0..N-1, 0..N-1].
7635                 The eigenvectors are stored in the matrix columns.
7636 
7637 Result:
7638     True, if the algorithm has converged.
7639     False, if the algorithm hasn't converged (rare case).
7640 
7641   -- ALGLIB --
7642      Copyright 2005-2008 by Bochkanov Sergey
7643 *************************************************************************/
smatrixevd(const real_2d_array & a,const ae_int_t n,const ae_int_t zneeded,const bool isupper,real_1d_array & d,real_2d_array & z,const xparams _xparams)7644 bool smatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, real_2d_array &z, const xparams _xparams)
7645 {
7646     jmp_buf _break_jump;
7647     alglib_impl::ae_state _alglib_env_state;
7648     alglib_impl::ae_state_init(&_alglib_env_state);
7649     if( setjmp(_break_jump) )
7650     {
7651 #if !defined(AE_NO_EXCEPTIONS)
7652         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7653 #else
7654         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7655         return 0;
7656 #endif
7657     }
7658     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7659     if( _xparams.flags!=0x0 )
7660         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7661     ae_bool result = alglib_impl::smatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
7662     alglib_impl::ae_state_clear(&_alglib_env_state);
7663     return *(reinterpret_cast<bool*>(&result));
7664 }
7665 
7666 /*************************************************************************
7667 Subroutine for finding the eigenvalues (and eigenvectors) of  a  symmetric
7668 matrix  in  a  given half open interval (A, B] by using  a  bisection  and
7669 inverse iteration
7670 
7671   ! COMMERCIAL EDITION OF ALGLIB:
7672   !
7673   ! Commercial Edition of ALGLIB includes following important improvements
7674   ! of this function:
7675   ! * high-performance native backend with same C# interface (C# version)
7676   ! * hardware vendor (Intel) implementations of linear algebra primitives
7677   !   (C++ and C# versions, x86/x64 platform)
7678   !
7679   ! We recommend you to read 'Working with commercial version' section  of
7680   ! ALGLIB Reference Manual in order to find out how to  use  performance-
7681   ! related features provided by commercial edition of ALGLIB.
7682 
7683 Input parameters:
7684     A       -   symmetric matrix which is given by its upper or lower
7685                 triangular part. Array [0..N-1, 0..N-1].
7686     N       -   size of matrix A.
7687     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
7688                 If ZNeeded is equal to:
7689                  * 0, the eigenvectors are not returned;
7690                  * 1, the eigenvectors are returned.
7691     IsUpperA -  storage format of matrix A.
7692     B1, B2 -    half open interval (B1, B2] to search eigenvalues in.
7693 
7694 Output parameters:
7695     M       -   number of eigenvalues found in a given half-interval (M>=0).
7696     W       -   array of the eigenvalues found.
7697                 Array whose index ranges within [0..M-1].
7698     Z       -   if ZNeeded is equal to:
7699                  * 0, Z hasn't changed;
7700                  * 1, Z contains eigenvectors.
7701                 Array whose indexes range within [0..N-1, 0..M-1].
7702                 The eigenvectors are stored in the matrix columns.
7703 
7704 Result:
7705     True, if successful. M contains the number of eigenvalues in the given
7706     half-interval (could be equal to 0), W contains the eigenvalues,
7707     Z contains the eigenvectors (if needed).
7708 
7709     False, if the bisection method subroutine wasn't able to find the
7710     eigenvalues in the given interval or if the inverse iteration subroutine
7711     wasn't able to find all the corresponding eigenvectors.
7712     In that case, the eigenvalues and eigenvectors are not returned,
7713     M is equal to 0.
7714 
7715   -- ALGLIB --
7716      Copyright 07.01.2006 by Bochkanov Sergey
7717 *************************************************************************/
smatrixevdr(const real_2d_array & a,const ae_int_t n,const ae_int_t zneeded,const bool isupper,const double b1,const double b2,ae_int_t & m,real_1d_array & w,real_2d_array & z,const xparams _xparams)7718 bool smatrixevdr(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, real_2d_array &z, const xparams _xparams)
7719 {
7720     jmp_buf _break_jump;
7721     alglib_impl::ae_state _alglib_env_state;
7722     alglib_impl::ae_state_init(&_alglib_env_state);
7723     if( setjmp(_break_jump) )
7724     {
7725 #if !defined(AE_NO_EXCEPTIONS)
7726         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7727 #else
7728         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7729         return 0;
7730 #endif
7731     }
7732     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7733     if( _xparams.flags!=0x0 )
7734         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7735     ae_bool result = alglib_impl::smatrixevdr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, b1, b2, &m, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
7736     alglib_impl::ae_state_clear(&_alglib_env_state);
7737     return *(reinterpret_cast<bool*>(&result));
7738 }
7739 
7740 /*************************************************************************
7741 Subroutine for finding the eigenvalues and  eigenvectors  of  a  symmetric
7742 matrix with given indexes by using bisection and inverse iteration methods.
7743 
7744 Input parameters:
7745     A       -   symmetric matrix which is given by its upper or lower
7746                 triangular part. Array whose indexes range within [0..N-1, 0..N-1].
7747     N       -   size of matrix A.
7748     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
7749                 If ZNeeded is equal to:
7750                  * 0, the eigenvectors are not returned;
7751                  * 1, the eigenvectors are returned.
7752     IsUpperA -  storage format of matrix A.
7753     I1, I2 -    index interval for searching (from I1 to I2).
7754                 0 <= I1 <= I2 <= N-1.
7755 
7756 Output parameters:
7757     W       -   array of the eigenvalues found.
7758                 Array whose index ranges within [0..I2-I1].
7759     Z       -   if ZNeeded is equal to:
7760                  * 0, Z hasn't changed;
7761                  * 1, Z contains eigenvectors.
7762                 Array whose indexes range within [0..N-1, 0..I2-I1].
7763                 In that case, the eigenvectors are stored in the matrix columns.
7764 
7765 Result:
7766     True, if successful. W contains the eigenvalues, Z contains the
7767     eigenvectors (if needed).
7768 
7769     False, if the bisection method subroutine wasn't able to find the
7770     eigenvalues in the given interval or if the inverse iteration subroutine
7771     wasn't able to find all the corresponding eigenvectors.
7772     In that case, the eigenvalues and eigenvectors are not returned.
7773 
7774   -- ALGLIB --
7775      Copyright 07.01.2006 by Bochkanov Sergey
7776 *************************************************************************/
smatrixevdi(const real_2d_array & a,const ae_int_t n,const ae_int_t zneeded,const bool isupper,const ae_int_t i1,const ae_int_t i2,real_1d_array & w,real_2d_array & z,const xparams _xparams)7777 bool smatrixevdi(const real_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, real_2d_array &z, const xparams _xparams)
7778 {
7779     jmp_buf _break_jump;
7780     alglib_impl::ae_state _alglib_env_state;
7781     alglib_impl::ae_state_init(&_alglib_env_state);
7782     if( setjmp(_break_jump) )
7783     {
7784 #if !defined(AE_NO_EXCEPTIONS)
7785         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7786 #else
7787         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7788         return 0;
7789 #endif
7790     }
7791     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7792     if( _xparams.flags!=0x0 )
7793         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7794     ae_bool result = alglib_impl::smatrixevdi(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, i1, i2, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
7795     alglib_impl::ae_state_clear(&_alglib_env_state);
7796     return *(reinterpret_cast<bool*>(&result));
7797 }
7798 
7799 /*************************************************************************
7800 Finding the eigenvalues and eigenvectors of a Hermitian matrix
7801 
7802 The algorithm finds eigen pairs of a Hermitian matrix by  reducing  it  to
7803 real tridiagonal form and using the QL/QR algorithm.
7804 
7805   ! COMMERCIAL EDITION OF ALGLIB:
7806   !
7807   ! Commercial Edition of ALGLIB includes following important improvements
7808   ! of this function:
7809   ! * high-performance native backend with same C# interface (C# version)
7810   ! * hardware vendor (Intel) implementations of linear algebra primitives
7811   !   (C++ and C# versions, x86/x64 platform)
7812   !
7813   ! We recommend you to read 'Working with commercial version' section  of
7814   ! ALGLIB Reference Manual in order to find out how to  use  performance-
7815   ! related features provided by commercial edition of ALGLIB.
7816 
7817 Input parameters:
7818     A       -   Hermitian matrix which is given  by  its  upper  or  lower
7819                 triangular part.
7820                 Array whose indexes range within [0..N-1, 0..N-1].
7821     N       -   size of matrix A.
7822     IsUpper -   storage format.
7823     ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
7824                 not. If ZNeeded is equal to:
7825                  * 0, the eigenvectors are not returned;
7826                  * 1, the eigenvectors are returned.
7827 
7828 Output parameters:
7829     D       -   eigenvalues in ascending order.
7830                 Array whose index ranges within [0..N-1].
7831     Z       -   if ZNeeded is equal to:
7832                  * 0, Z hasn't changed;
7833                  * 1, Z contains the eigenvectors.
7834                 Array whose indexes range within [0..N-1, 0..N-1].
7835                 The eigenvectors are stored in the matrix columns.
7836 
7837 Result:
7838     True, if the algorithm has converged.
7839     False, if the algorithm hasn't converged (rare case).
7840 
7841 Note:
7842     eigenvectors of Hermitian matrix are defined up to  multiplication  by
7843     a complex number L, such that |L|=1.
7844 
7845   -- ALGLIB --
7846      Copyright 2005, 23 March 2007 by Bochkanov Sergey
7847 *************************************************************************/
hmatrixevd(const complex_2d_array & a,const ae_int_t n,const ae_int_t zneeded,const bool isupper,real_1d_array & d,complex_2d_array & z,const xparams _xparams)7848 bool hmatrixevd(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, real_1d_array &d, complex_2d_array &z, const xparams _xparams)
7849 {
7850     jmp_buf _break_jump;
7851     alglib_impl::ae_state _alglib_env_state;
7852     alglib_impl::ae_state_init(&_alglib_env_state);
7853     if( setjmp(_break_jump) )
7854     {
7855 #if !defined(AE_NO_EXCEPTIONS)
7856         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7857 #else
7858         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7859         return 0;
7860 #endif
7861     }
7862     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7863     if( _xparams.flags!=0x0 )
7864         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7865     ae_bool result = alglib_impl::hmatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
7866     alglib_impl::ae_state_clear(&_alglib_env_state);
7867     return *(reinterpret_cast<bool*>(&result));
7868 }
7869 
7870 /*************************************************************************
7871 Subroutine for finding the eigenvalues (and eigenvectors) of  a  Hermitian
7872 matrix  in  a  given half-interval (A, B] by using a bisection and inverse
7873 iteration
7874 
7875 Input parameters:
7876     A       -   Hermitian matrix which is given  by  its  upper  or  lower
7877                 triangular  part.  Array  whose   indexes   range   within
7878                 [0..N-1, 0..N-1].
7879     N       -   size of matrix A.
7880     ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
7881                 not. If ZNeeded is equal to:
7882                  * 0, the eigenvectors are not returned;
7883                  * 1, the eigenvectors are returned.
7884     IsUpperA -  storage format of matrix A.
7885     B1, B2 -    half-interval (B1, B2] to search eigenvalues in.
7886 
7887 Output parameters:
7888     M       -   number of eigenvalues found in a given half-interval, M>=0
7889     W       -   array of the eigenvalues found.
7890                 Array whose index ranges within [0..M-1].
7891     Z       -   if ZNeeded is equal to:
7892                  * 0, Z hasn't changed;
7893                  * 1, Z contains eigenvectors.
7894                 Array whose indexes range within [0..N-1, 0..M-1].
7895                 The eigenvectors are stored in the matrix columns.
7896 
7897 Result:
7898     True, if successful. M contains the number of eigenvalues in the given
7899     half-interval (could be equal to 0), W contains the eigenvalues,
7900     Z contains the eigenvectors (if needed).
7901 
7902     False, if the bisection method subroutine  wasn't  able  to  find  the
7903     eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
7904     subroutine  wasn't  able  to  find all the corresponding eigenvectors.
7905     In that case, the eigenvalues and eigenvectors are not returned, M  is
7906     equal to 0.
7907 
7908 Note:
7909     eigen vectors of Hermitian matrix are defined up to multiplication  by
7910     a complex number L, such as |L|=1.
7911 
7912   -- ALGLIB --
7913      Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
7914 *************************************************************************/
hmatrixevdr(const complex_2d_array & a,const ae_int_t n,const ae_int_t zneeded,const bool isupper,const double b1,const double b2,ae_int_t & m,real_1d_array & w,complex_2d_array & z,const xparams _xparams)7915 bool hmatrixevdr(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const double b1, const double b2, ae_int_t &m, real_1d_array &w, complex_2d_array &z, const xparams _xparams)
7916 {
7917     jmp_buf _break_jump;
7918     alglib_impl::ae_state _alglib_env_state;
7919     alglib_impl::ae_state_init(&_alglib_env_state);
7920     if( setjmp(_break_jump) )
7921     {
7922 #if !defined(AE_NO_EXCEPTIONS)
7923         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7924 #else
7925         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7926         return 0;
7927 #endif
7928     }
7929     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7930     if( _xparams.flags!=0x0 )
7931         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7932     ae_bool result = alglib_impl::hmatrixevdr(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, b1, b2, &m, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
7933     alglib_impl::ae_state_clear(&_alglib_env_state);
7934     return *(reinterpret_cast<bool*>(&result));
7935 }
7936 
7937 /*************************************************************************
7938 Subroutine for finding the eigenvalues and  eigenvectors  of  a  Hermitian
7939 matrix with given indexes by using bisection and inverse iteration methods
7940 
7941 Input parameters:
7942     A       -   Hermitian matrix which is given  by  its  upper  or  lower
7943                 triangular part.
7944                 Array whose indexes range within [0..N-1, 0..N-1].
7945     N       -   size of matrix A.
7946     ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
7947                 not. If ZNeeded is equal to:
7948                  * 0, the eigenvectors are not returned;
7949                  * 1, the eigenvectors are returned.
7950     IsUpperA -  storage format of matrix A.
7951     I1, I2 -    index interval for searching (from I1 to I2).
7952                 0 <= I1 <= I2 <= N-1.
7953 
7954 Output parameters:
7955     W       -   array of the eigenvalues found.
7956                 Array whose index ranges within [0..I2-I1].
7957     Z       -   if ZNeeded is equal to:
7958                  * 0, Z hasn't changed;
7959                  * 1, Z contains eigenvectors.
7960                 Array whose indexes range within [0..N-1, 0..I2-I1].
7961                 In  that  case,  the eigenvectors are stored in the matrix
7962                 columns.
7963 
7964 Result:
7965     True, if successful. W contains the eigenvalues, Z contains the
7966     eigenvectors (if needed).
7967 
7968     False, if the bisection method subroutine  wasn't  able  to  find  the
7969     eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
7970     subroutine wasn't able to find  all  the  corresponding  eigenvectors.
7971     In that case, the eigenvalues and eigenvectors are not returned.
7972 
7973 Note:
7974     eigen vectors of Hermitian matrix are defined up to multiplication  by
7975     a complex number L, such as |L|=1.
7976 
7977   -- ALGLIB --
7978      Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
7979 *************************************************************************/
hmatrixevdi(const complex_2d_array & a,const ae_int_t n,const ae_int_t zneeded,const bool isupper,const ae_int_t i1,const ae_int_t i2,real_1d_array & w,complex_2d_array & z,const xparams _xparams)7980 bool hmatrixevdi(const complex_2d_array &a, const ae_int_t n, const ae_int_t zneeded, const bool isupper, const ae_int_t i1, const ae_int_t i2, real_1d_array &w, complex_2d_array &z, const xparams _xparams)
7981 {
7982     jmp_buf _break_jump;
7983     alglib_impl::ae_state _alglib_env_state;
7984     alglib_impl::ae_state_init(&_alglib_env_state);
7985     if( setjmp(_break_jump) )
7986     {
7987 #if !defined(AE_NO_EXCEPTIONS)
7988         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7989 #else
7990         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7991         return 0;
7992 #endif
7993     }
7994     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7995     if( _xparams.flags!=0x0 )
7996         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7997     ae_bool result = alglib_impl::hmatrixevdi(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, zneeded, isupper, i1, i2, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
7998     alglib_impl::ae_state_clear(&_alglib_env_state);
7999     return *(reinterpret_cast<bool*>(&result));
8000 }
8001 
8002 /*************************************************************************
8003 Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix
8004 
8005 The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by
8006 using an QL/QR algorithm with implicit shifts.
8007 
8008   ! COMMERCIAL EDITION OF ALGLIB:
8009   !
8010   ! Commercial Edition of ALGLIB includes following important improvements
8011   ! of this function:
8012   ! * high-performance native backend with same C# interface (C# version)
8013   ! * hardware vendor (Intel) implementations of linear algebra primitives
8014   !   (C++ and C# versions, x86/x64 platform)
8015   !
8016   ! We recommend you to read 'Working with commercial version' section  of
8017   ! ALGLIB Reference Manual in order to find out how to  use  performance-
8018   ! related features provided by commercial edition of ALGLIB.
8019 
8020 Input parameters:
8021     D       -   the main diagonal of a tridiagonal matrix.
8022                 Array whose index ranges within [0..N-1].
8023     E       -   the secondary diagonal of a tridiagonal matrix.
8024                 Array whose index ranges within [0..N-2].
8025     N       -   size of matrix A.
8026     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
8027                 If ZNeeded is equal to:
8028                  * 0, the eigenvectors are not needed;
8029                  * 1, the eigenvectors of a tridiagonal matrix
8030                    are multiplied by the square matrix Z. It is used if the
8031                    tridiagonal matrix is obtained by the similarity
8032                    transformation of a symmetric matrix;
8033                  * 2, the eigenvectors of a tridiagonal matrix replace the
8034                    square matrix Z;
8035                  * 3, matrix Z contains the first row of the eigenvectors
8036                    matrix.
8037     Z       -   if ZNeeded=1, Z contains the square matrix by which the
8038                 eigenvectors are multiplied.
8039                 Array whose indexes range within [0..N-1, 0..N-1].
8040 
8041 Output parameters:
8042     D       -   eigenvalues in ascending order.
8043                 Array whose index ranges within [0..N-1].
8044     Z       -   if ZNeeded is equal to:
8045                  * 0, Z hasn't changed;
8046                  * 1, Z contains the product of a given matrix (from the left)
8047                    and the eigenvectors matrix (from the right);
8048                  * 2, Z contains the eigenvectors.
8049                  * 3, Z contains the first row of the eigenvectors matrix.
8050                 If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1].
8051                 In that case, the eigenvectors are stored in the matrix columns.
8052                 If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1].
8053 
8054 Result:
8055     True, if the algorithm has converged.
8056     False, if the algorithm hasn't converged.
8057 
8058   -- LAPACK routine (version 3.0) --
8059      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
8060      Courant Institute, Argonne National Lab, and Rice University
8061      September 30, 1994
8062 *************************************************************************/
smatrixtdevd(real_1d_array & d,const real_1d_array & e,const ae_int_t n,const ae_int_t zneeded,real_2d_array & z,const xparams _xparams)8063 bool smatrixtdevd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, real_2d_array &z, const xparams _xparams)
8064 {
8065     jmp_buf _break_jump;
8066     alglib_impl::ae_state _alglib_env_state;
8067     alglib_impl::ae_state_init(&_alglib_env_state);
8068     if( setjmp(_break_jump) )
8069     {
8070 #if !defined(AE_NO_EXCEPTIONS)
8071         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8072 #else
8073         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8074         return 0;
8075 #endif
8076     }
8077     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8078     if( _xparams.flags!=0x0 )
8079         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8080     ae_bool result = alglib_impl::smatrixtdevd(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
8081     alglib_impl::ae_state_clear(&_alglib_env_state);
8082     return *(reinterpret_cast<bool*>(&result));
8083 }
8084 
8085 /*************************************************************************
8086 Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a
8087 given half-interval (A, B] by using bisection and inverse iteration.
8088 
8089 Input parameters:
8090     D       -   the main diagonal of a tridiagonal matrix.
8091                 Array whose index ranges within [0..N-1].
8092     E       -   the secondary diagonal of a tridiagonal matrix.
8093                 Array whose index ranges within [0..N-2].
8094     N       -   size of matrix, N>=0.
8095     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
8096                 If ZNeeded is equal to:
8097                  * 0, the eigenvectors are not needed;
8098                  * 1, the eigenvectors of a tridiagonal matrix are multiplied
8099                    by the square matrix Z. It is used if the tridiagonal
8100                    matrix is obtained by the similarity transformation
8101                    of a symmetric matrix.
8102                  * 2, the eigenvectors of a tridiagonal matrix replace matrix Z.
8103     A, B    -   half-interval (A, B] to search eigenvalues in.
8104     Z       -   if ZNeeded is equal to:
8105                  * 0, Z isn't used and remains unchanged;
8106                  * 1, Z contains the square matrix (array whose indexes range
8107                    within [0..N-1, 0..N-1]) which reduces the given symmetric
8108                    matrix to tridiagonal form;
8109                  * 2, Z isn't used (but changed on the exit).
8110 
8111 Output parameters:
8112     D       -   array of the eigenvalues found.
8113                 Array whose index ranges within [0..M-1].
8114     M       -   number of eigenvalues found in the given half-interval (M>=0).
8115     Z       -   if ZNeeded is equal to:
8116                  * 0, doesn't contain any information;
8117                  * 1, contains the product of a given NxN matrix Z (from the
8118                    left) and NxM matrix of the eigenvectors found (from the
8119                    right). Array whose indexes range within [0..N-1, 0..M-1].
8120                  * 2, contains the matrix of the eigenvectors found.
8121                    Array whose indexes range within [0..N-1, 0..M-1].
8122 
8123 Result:
8124 
8125     True, if successful. In that case, M contains the number of eigenvalues
8126     in the given half-interval (could be equal to 0), D contains the eigenvalues,
8127     Z contains the eigenvectors (if needed).
8128     It should be noted that the subroutine changes the size of arrays D and Z.
8129 
8130     False, if the bisection method subroutine wasn't able to find the
8131     eigenvalues in the given interval or if the inverse iteration subroutine
8132     wasn't able to find all the corresponding eigenvectors. In that case,
8133     the eigenvalues and eigenvectors are not returned, M is equal to 0.
8134 
8135   -- ALGLIB --
8136      Copyright 31.03.2008 by Bochkanov Sergey
8137 *************************************************************************/
smatrixtdevdr(real_1d_array & d,const real_1d_array & e,const ae_int_t n,const ae_int_t zneeded,const double a,const double b,ae_int_t & m,real_2d_array & z,const xparams _xparams)8138 bool smatrixtdevdr(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const double a, const double b, ae_int_t &m, real_2d_array &z, const xparams _xparams)
8139 {
8140     jmp_buf _break_jump;
8141     alglib_impl::ae_state _alglib_env_state;
8142     alglib_impl::ae_state_init(&_alglib_env_state);
8143     if( setjmp(_break_jump) )
8144     {
8145 #if !defined(AE_NO_EXCEPTIONS)
8146         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8147 #else
8148         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8149         return 0;
8150 #endif
8151     }
8152     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8153     if( _xparams.flags!=0x0 )
8154         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8155     ae_bool result = alglib_impl::smatrixtdevdr(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, a, b, &m, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
8156     alglib_impl::ae_state_clear(&_alglib_env_state);
8157     return *(reinterpret_cast<bool*>(&result));
8158 }
8159 
8160 /*************************************************************************
8161 Subroutine for finding tridiagonal matrix eigenvalues/vectors with given
8162 indexes (in ascending order) by using the bisection and inverse iteraion.
8163 
8164 Input parameters:
8165     D       -   the main diagonal of a tridiagonal matrix.
8166                 Array whose index ranges within [0..N-1].
8167     E       -   the secondary diagonal of a tridiagonal matrix.
8168                 Array whose index ranges within [0..N-2].
8169     N       -   size of matrix. N>=0.
8170     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
8171                 If ZNeeded is equal to:
8172                  * 0, the eigenvectors are not needed;
8173                  * 1, the eigenvectors of a tridiagonal matrix are multiplied
8174                    by the square matrix Z. It is used if the
8175                    tridiagonal matrix is obtained by the similarity transformation
8176                    of a symmetric matrix.
8177                  * 2, the eigenvectors of a tridiagonal matrix replace
8178                    matrix Z.
8179     I1, I2  -   index interval for searching (from I1 to I2).
8180                 0 <= I1 <= I2 <= N-1.
8181     Z       -   if ZNeeded is equal to:
8182                  * 0, Z isn't used and remains unchanged;
8183                  * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1])
8184                    which reduces the given symmetric matrix to  tridiagonal form;
8185                  * 2, Z isn't used (but changed on the exit).
8186 
8187 Output parameters:
8188     D       -   array of the eigenvalues found.
8189                 Array whose index ranges within [0..I2-I1].
8190     Z       -   if ZNeeded is equal to:
8191                  * 0, doesn't contain any information;
8192                  * 1, contains the product of a given NxN matrix Z (from the left) and
8193                    Nx(I2-I1) matrix of the eigenvectors found (from the right).
8194                    Array whose indexes range within [0..N-1, 0..I2-I1].
8195                  * 2, contains the matrix of the eigenvalues found.
8196                    Array whose indexes range within [0..N-1, 0..I2-I1].
8197 
8198 
8199 Result:
8200 
8201     True, if successful. In that case, D contains the eigenvalues,
8202     Z contains the eigenvectors (if needed).
8203     It should be noted that the subroutine changes the size of arrays D and Z.
8204 
8205     False, if the bisection method subroutine wasn't able to find the eigenvalues
8206     in the given interval or if the inverse iteration subroutine wasn't able
8207     to find all the corresponding eigenvectors. In that case, the eigenvalues
8208     and eigenvectors are not returned.
8209 
8210   -- ALGLIB --
8211      Copyright 25.12.2005 by Bochkanov Sergey
8212 *************************************************************************/
smatrixtdevdi(real_1d_array & d,const real_1d_array & e,const ae_int_t n,const ae_int_t zneeded,const ae_int_t i1,const ae_int_t i2,real_2d_array & z,const xparams _xparams)8213 bool smatrixtdevdi(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const ae_int_t zneeded, const ae_int_t i1, const ae_int_t i2, real_2d_array &z, const xparams _xparams)
8214 {
8215     jmp_buf _break_jump;
8216     alglib_impl::ae_state _alglib_env_state;
8217     alglib_impl::ae_state_init(&_alglib_env_state);
8218     if( setjmp(_break_jump) )
8219     {
8220 #if !defined(AE_NO_EXCEPTIONS)
8221         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8222 #else
8223         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8224         return 0;
8225 #endif
8226     }
8227     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8228     if( _xparams.flags!=0x0 )
8229         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8230     ae_bool result = alglib_impl::smatrixtdevdi(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, zneeded, i1, i2, const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
8231     alglib_impl::ae_state_clear(&_alglib_env_state);
8232     return *(reinterpret_cast<bool*>(&result));
8233 }
8234 
8235 /*************************************************************************
8236 Finding eigenvalues and eigenvectors of a general (unsymmetric) matrix
8237 
8238   ! COMMERCIAL EDITION OF ALGLIB:
8239   !
8240   ! Commercial Edition of ALGLIB includes following important improvements
8241   ! of this function:
8242   ! * high-performance native backend with same C# interface (C# version)
8243   ! * hardware vendor (Intel) implementations of linear algebra primitives
8244   !   (C++ and C# versions, x86/x64 platform)
8245   !
8246   ! We recommend you to read 'Working with commercial version' section  of
8247   ! ALGLIB Reference Manual in order to find out how to  use  performance-
8248   ! related features provided by commercial edition of ALGLIB.
8249 
8250 The algorithm finds eigenvalues and eigenvectors of a general matrix by
8251 using the QR algorithm with multiple shifts. The algorithm can find
8252 eigenvalues and both left and right eigenvectors.
8253 
8254 The right eigenvector is a vector x such that A*x = w*x, and the left
8255 eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex
8256 conjugate transposition of vector y).
8257 
8258 Input parameters:
8259     A       -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
8260     N       -   size of matrix A.
8261     VNeeded -   flag controlling whether eigenvectors are needed or not.
8262                 If VNeeded is equal to:
8263                  * 0, eigenvectors are not returned;
8264                  * 1, right eigenvectors are returned;
8265                  * 2, left eigenvectors are returned;
8266                  * 3, both left and right eigenvectors are returned.
8267 
8268 Output parameters:
8269     WR      -   real parts of eigenvalues.
8270                 Array whose index ranges within [0..N-1].
8271     WR      -   imaginary parts of eigenvalues.
8272                 Array whose index ranges within [0..N-1].
8273     VL, VR  -   arrays of left and right eigenvectors (if they are needed).
8274                 If WI[i]=0, the respective eigenvalue is a real number,
8275                 and it corresponds to the column number I of matrices VL/VR.
8276                 If WI[i]>0, we have a pair of complex conjugate numbers with
8277                 positive and negative imaginary parts:
8278                     the first eigenvalue WR[i] + sqrt(-1)*WI[i];
8279                     the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1];
8280                     WI[i]>0
8281                     WI[i+1] = -WI[i] < 0
8282                 In that case, the eigenvector  corresponding to the first
8283                 eigenvalue is located in i and i+1 columns of matrices
8284                 VL/VR (the column number i contains the real part, and the
8285                 column number i+1 contains the imaginary part), and the vector
8286                 corresponding to the second eigenvalue is a complex conjugate to
8287                 the first vector.
8288                 Arrays whose indexes range within [0..N-1, 0..N-1].
8289 
8290 Result:
8291     True, if the algorithm has converged.
8292     False, if the algorithm has not converged.
8293 
8294 Note 1:
8295     Some users may ask the following question: what if WI[N-1]>0?
8296     WI[N] must contain an eigenvalue which is complex conjugate to the
8297     N-th eigenvalue, but the array has only size N?
8298     The answer is as follows: such a situation cannot occur because the
8299     algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is
8300     strictly less than N-1.
8301 
8302 Note 2:
8303     The algorithm performance depends on the value of the internal parameter
8304     NS of the InternalSchurDecomposition subroutine which defines the number
8305     of shifts in the QR algorithm (similarly to the block width in block-matrix
8306     algorithms of linear algebra). If you require maximum performance
8307     on your machine, it is recommended to adjust this parameter manually.
8308 
8309 
8310 See also the InternalTREVC subroutine.
8311 
8312 The algorithm is based on the LAPACK 3.0 library.
8313 *************************************************************************/
rmatrixevd(const real_2d_array & a,const ae_int_t n,const ae_int_t vneeded,real_1d_array & wr,real_1d_array & wi,real_2d_array & vl,real_2d_array & vr,const xparams _xparams)8314 bool rmatrixevd(const real_2d_array &a, const ae_int_t n, const ae_int_t vneeded, real_1d_array &wr, real_1d_array &wi, real_2d_array &vl, real_2d_array &vr, const xparams _xparams)
8315 {
8316     jmp_buf _break_jump;
8317     alglib_impl::ae_state _alglib_env_state;
8318     alglib_impl::ae_state_init(&_alglib_env_state);
8319     if( setjmp(_break_jump) )
8320     {
8321 #if !defined(AE_NO_EXCEPTIONS)
8322         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8323 #else
8324         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8325         return 0;
8326 #endif
8327     }
8328     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8329     if( _xparams.flags!=0x0 )
8330         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8331     ae_bool result = alglib_impl::rmatrixevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, vneeded, const_cast<alglib_impl::ae_vector*>(wr.c_ptr()), const_cast<alglib_impl::ae_vector*>(wi.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vl.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vr.c_ptr()), &_alglib_env_state);
8332     alglib_impl::ae_state_clear(&_alglib_env_state);
8333     return *(reinterpret_cast<bool*>(&result));
8334 }
8335 #endif
8336 
8337 #if defined(AE_COMPILE_DLU) || !defined(AE_PARTIAL_BUILD)
8338 
8339 #endif
8340 
8341 #if defined(AE_COMPILE_SPTRF) || !defined(AE_PARTIAL_BUILD)
8342 
8343 #endif
8344 
8345 #if defined(AE_COMPILE_AMDORDERING) || !defined(AE_PARTIAL_BUILD)
8346 
8347 #endif
8348 
8349 #if defined(AE_COMPILE_SPCHOL) || !defined(AE_PARTIAL_BUILD)
8350 
8351 #endif
8352 
8353 #if defined(AE_COMPILE_TRFAC) || !defined(AE_PARTIAL_BUILD)
8354 /*************************************************************************
8355 An analysis of the sparse matrix decomposition, performed prior to  actual
8356 numerical factorization. You should not directly  access  fields  of  this
8357 object - use appropriate ALGLIB functions to work with this object.
8358 *************************************************************************/
_sparsedecompositionanalysis_owner()8359 _sparsedecompositionanalysis_owner::_sparsedecompositionanalysis_owner()
8360 {
8361     jmp_buf _break_jump;
8362     alglib_impl::ae_state _state;
8363 
8364     alglib_impl::ae_state_init(&_state);
8365     if( setjmp(_break_jump) )
8366     {
8367         if( p_struct!=NULL )
8368         {
8369             alglib_impl::_sparsedecompositionanalysis_destroy(p_struct);
8370             alglib_impl::ae_free(p_struct);
8371         }
8372         p_struct = NULL;
8373 #if !defined(AE_NO_EXCEPTIONS)
8374         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8375 #else
8376         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8377         return;
8378 #endif
8379     }
8380     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8381     p_struct = NULL;
8382     p_struct = (alglib_impl::sparsedecompositionanalysis*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsedecompositionanalysis), &_state);
8383     memset(p_struct, 0, sizeof(alglib_impl::sparsedecompositionanalysis));
8384     alglib_impl::_sparsedecompositionanalysis_init(p_struct, &_state, ae_false);
8385     ae_state_clear(&_state);
8386 }
8387 
_sparsedecompositionanalysis_owner(const _sparsedecompositionanalysis_owner & rhs)8388 _sparsedecompositionanalysis_owner::_sparsedecompositionanalysis_owner(const _sparsedecompositionanalysis_owner &rhs)
8389 {
8390     jmp_buf _break_jump;
8391     alglib_impl::ae_state _state;
8392 
8393     alglib_impl::ae_state_init(&_state);
8394     if( setjmp(_break_jump) )
8395     {
8396         if( p_struct!=NULL )
8397         {
8398             alglib_impl::_sparsedecompositionanalysis_destroy(p_struct);
8399             alglib_impl::ae_free(p_struct);
8400         }
8401         p_struct = NULL;
8402 #if !defined(AE_NO_EXCEPTIONS)
8403         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8404 #else
8405         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8406         return;
8407 #endif
8408     }
8409     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8410     p_struct = NULL;
8411     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsedecompositionanalysis copy constructor failure (source is not initialized)", &_state);
8412     p_struct = (alglib_impl::sparsedecompositionanalysis*)alglib_impl::ae_malloc(sizeof(alglib_impl::sparsedecompositionanalysis), &_state);
8413     memset(p_struct, 0, sizeof(alglib_impl::sparsedecompositionanalysis));
8414     alglib_impl::_sparsedecompositionanalysis_init_copy(p_struct, const_cast<alglib_impl::sparsedecompositionanalysis*>(rhs.p_struct), &_state, ae_false);
8415     ae_state_clear(&_state);
8416 }
8417 
operator =(const _sparsedecompositionanalysis_owner & rhs)8418 _sparsedecompositionanalysis_owner& _sparsedecompositionanalysis_owner::operator=(const _sparsedecompositionanalysis_owner &rhs)
8419 {
8420     if( this==&rhs )
8421         return *this;
8422     jmp_buf _break_jump;
8423     alglib_impl::ae_state _state;
8424 
8425     alglib_impl::ae_state_init(&_state);
8426     if( setjmp(_break_jump) )
8427     {
8428 #if !defined(AE_NO_EXCEPTIONS)
8429         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8430 #else
8431         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8432         return *this;
8433 #endif
8434     }
8435     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8436     alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: sparsedecompositionanalysis assignment constructor failure (destination is not initialized)", &_state);
8437     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: sparsedecompositionanalysis assignment constructor failure (source is not initialized)", &_state);
8438     alglib_impl::_sparsedecompositionanalysis_destroy(p_struct);
8439     memset(p_struct, 0, sizeof(alglib_impl::sparsedecompositionanalysis));
8440     alglib_impl::_sparsedecompositionanalysis_init_copy(p_struct, const_cast<alglib_impl::sparsedecompositionanalysis*>(rhs.p_struct), &_state, ae_false);
8441     ae_state_clear(&_state);
8442     return *this;
8443 }
8444 
~_sparsedecompositionanalysis_owner()8445 _sparsedecompositionanalysis_owner::~_sparsedecompositionanalysis_owner()
8446 {
8447     if( p_struct!=NULL )
8448     {
8449         alglib_impl::_sparsedecompositionanalysis_destroy(p_struct);
8450         ae_free(p_struct);
8451     }
8452 }
8453 
c_ptr()8454 alglib_impl::sparsedecompositionanalysis* _sparsedecompositionanalysis_owner::c_ptr()
8455 {
8456     return p_struct;
8457 }
8458 
c_ptr() const8459 alglib_impl::sparsedecompositionanalysis* _sparsedecompositionanalysis_owner::c_ptr() const
8460 {
8461     return const_cast<alglib_impl::sparsedecompositionanalysis*>(p_struct);
8462 }
sparsedecompositionanalysis()8463 sparsedecompositionanalysis::sparsedecompositionanalysis() : _sparsedecompositionanalysis_owner()
8464 {
8465 }
8466 
sparsedecompositionanalysis(const sparsedecompositionanalysis & rhs)8467 sparsedecompositionanalysis::sparsedecompositionanalysis(const sparsedecompositionanalysis &rhs):_sparsedecompositionanalysis_owner(rhs)
8468 {
8469 }
8470 
operator =(const sparsedecompositionanalysis & rhs)8471 sparsedecompositionanalysis& sparsedecompositionanalysis::operator=(const sparsedecompositionanalysis &rhs)
8472 {
8473     if( this==&rhs )
8474         return *this;
8475     _sparsedecompositionanalysis_owner::operator=(rhs);
8476     return *this;
8477 }
8478 
~sparsedecompositionanalysis()8479 sparsedecompositionanalysis::~sparsedecompositionanalysis()
8480 {
8481 }
8482 
8483 /*************************************************************************
8484 LU decomposition of a general real matrix with row pivoting
8485 
8486 A is represented as A = P*L*U, where:
8487 * L is lower unitriangular matrix
8488 * U is upper triangular matrix
8489 * P = P0*P1*...*PK, K=min(M,N)-1,
8490   Pi - permutation matrix for I and Pivots[I]
8491 
8492 INPUT PARAMETERS:
8493     A       -   array[0..M-1, 0..N-1].
8494     M       -   number of rows in matrix A.
8495     N       -   number of columns in matrix A.
8496 
8497 
8498 OUTPUT PARAMETERS:
8499     A       -   matrices L and U in compact form:
8500                 * L is stored under main diagonal
8501                 * U is stored on and above main diagonal
8502     Pivots  -   permutation matrix in compact form.
8503                 array[0..Min(M-1,N-1)].
8504 
8505   ! FREE EDITION OF ALGLIB:
8506   !
8507   ! Free Edition of ALGLIB supports following important features for  this
8508   ! function:
8509   ! * C++ version: x64 SIMD support using C++ intrinsics
8510   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
8511   !
8512   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
8513   ! Reference Manual in order  to  find  out  how to activate SIMD support
8514   ! in ALGLIB.
8515 
8516   ! COMMERCIAL EDITION OF ALGLIB:
8517   !
8518   ! Commercial Edition of ALGLIB includes following important improvements
8519   ! of this function:
8520   ! * high-performance native backend with same C# interface (C# version)
8521   ! * multithreading support (C++ and C# versions)
8522   ! * hardware vendor (Intel) implementations of linear algebra primitives
8523   !   (C++ and C# versions, x86/x64 platform)
8524   !
8525   ! We recommend you to read 'Working with commercial version' section  of
8526   ! ALGLIB Reference Manual in order to find out how to  use  performance-
8527   ! related features provided by commercial edition of ALGLIB.
8528 
8529   -- ALGLIB routine --
8530      10.01.2010
8531      Bochkanov Sergey
8532 *************************************************************************/
rmatrixlu(real_2d_array & a,const ae_int_t m,const ae_int_t n,integer_1d_array & pivots,const xparams _xparams)8533 void rmatrixlu(real_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots, const xparams _xparams)
8534 {
8535     jmp_buf _break_jump;
8536     alglib_impl::ae_state _alglib_env_state;
8537     alglib_impl::ae_state_init(&_alglib_env_state);
8538     if( setjmp(_break_jump) )
8539     {
8540 #if !defined(AE_NO_EXCEPTIONS)
8541         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8542 #else
8543         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8544         return;
8545 #endif
8546     }
8547     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8548     if( _xparams.flags!=0x0 )
8549         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8550     alglib_impl::rmatrixlu(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), &_alglib_env_state);
8551     alglib_impl::ae_state_clear(&_alglib_env_state);
8552     return;
8553 }
8554 
8555 /*************************************************************************
8556 LU decomposition of a general complex matrix with row pivoting
8557 
8558 A is represented as A = P*L*U, where:
8559 * L is lower unitriangular matrix
8560 * U is upper triangular matrix
8561 * P = P0*P1*...*PK, K=min(M,N)-1,
8562   Pi - permutation matrix for I and Pivots[I]
8563 
8564 INPUT PARAMETERS:
8565     A       -   array[0..M-1, 0..N-1].
8566     M       -   number of rows in matrix A.
8567     N       -   number of columns in matrix A.
8568 
8569 
8570 OUTPUT PARAMETERS:
8571     A       -   matrices L and U in compact form:
8572                 * L is stored under main diagonal
8573                 * U is stored on and above main diagonal
8574     Pivots  -   permutation matrix in compact form.
8575                 array[0..Min(M-1,N-1)].
8576 
8577   ! FREE EDITION OF ALGLIB:
8578   !
8579   ! Free Edition of ALGLIB supports following important features for  this
8580   ! function:
8581   ! * C++ version: x64 SIMD support using C++ intrinsics
8582   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
8583   !
8584   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
8585   ! Reference Manual in order  to  find  out  how to activate SIMD support
8586   ! in ALGLIB.
8587 
8588   ! COMMERCIAL EDITION OF ALGLIB:
8589   !
8590   ! Commercial Edition of ALGLIB includes following important improvements
8591   ! of this function:
8592   ! * high-performance native backend with same C# interface (C# version)
8593   ! * multithreading support (C++ and C# versions)
8594   ! * hardware vendor (Intel) implementations of linear algebra primitives
8595   !   (C++ and C# versions, x86/x64 platform)
8596   !
8597   ! We recommend you to read 'Working with commercial version' section  of
8598   ! ALGLIB Reference Manual in order to find out how to  use  performance-
8599   ! related features provided by commercial edition of ALGLIB.
8600 
8601   -- ALGLIB routine --
8602      10.01.2010
8603      Bochkanov Sergey
8604 *************************************************************************/
cmatrixlu(complex_2d_array & a,const ae_int_t m,const ae_int_t n,integer_1d_array & pivots,const xparams _xparams)8605 void cmatrixlu(complex_2d_array &a, const ae_int_t m, const ae_int_t n, integer_1d_array &pivots, const xparams _xparams)
8606 {
8607     jmp_buf _break_jump;
8608     alglib_impl::ae_state _alglib_env_state;
8609     alglib_impl::ae_state_init(&_alglib_env_state);
8610     if( setjmp(_break_jump) )
8611     {
8612 #if !defined(AE_NO_EXCEPTIONS)
8613         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8614 #else
8615         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8616         return;
8617 #endif
8618     }
8619     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8620     if( _xparams.flags!=0x0 )
8621         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8622     alglib_impl::cmatrixlu(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), &_alglib_env_state);
8623     alglib_impl::ae_state_clear(&_alglib_env_state);
8624     return;
8625 }
8626 
8627 /*************************************************************************
8628 Cache-oblivious Cholesky decomposition
8629 
8630 The algorithm computes Cholesky decomposition  of  a  Hermitian  positive-
8631 definite matrix. The result of an algorithm is a representation  of  A  as
8632 A=U'*U  or A=L*L' (here X' denotes conj(X^T)).
8633 
8634 INPUT PARAMETERS:
8635     A       -   upper or lower triangle of a factorized matrix.
8636                 array with elements [0..N-1, 0..N-1].
8637     N       -   size of matrix A.
8638     IsUpper -   if IsUpper=True, then A contains an upper triangle of
8639                 a symmetric matrix, otherwise A contains a lower one.
8640 
8641 OUTPUT PARAMETERS:
8642     A       -   the result of factorization. If IsUpper=True, then
8643                 the upper triangle contains matrix U, so that A = U'*U,
8644                 and the elements below the main diagonal are not modified.
8645                 Similarly, if IsUpper = False.
8646 
8647 RESULT:
8648     If  the  matrix  is  positive-definite,  the  function  returns  True.
8649     Otherwise, the function returns False. Contents of A is not determined
8650     in such case.
8651 
8652   ! FREE EDITION OF ALGLIB:
8653   !
8654   ! Free Edition of ALGLIB supports following important features for  this
8655   ! function:
8656   ! * C++ version: x64 SIMD support using C++ intrinsics
8657   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
8658   !
8659   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
8660   ! Reference Manual in order  to  find  out  how to activate SIMD support
8661   ! in ALGLIB.
8662 
8663   ! COMMERCIAL EDITION OF ALGLIB:
8664   !
8665   ! Commercial Edition of ALGLIB includes following important improvements
8666   ! of this function:
8667   ! * high-performance native backend with same C# interface (C# version)
8668   ! * multithreading support (C++ and C# versions)
8669   ! * hardware vendor (Intel) implementations of linear algebra primitives
8670   !   (C++ and C# versions, x86/x64 platform)
8671   !
8672   ! We recommend you to read 'Working with commercial version' section  of
8673   ! ALGLIB Reference Manual in order to find out how to  use  performance-
8674   ! related features provided by commercial edition of ALGLIB.
8675 
8676   -- ALGLIB routine --
8677      15.12.2009-22.01.2018
8678      Bochkanov Sergey
8679 *************************************************************************/
hpdmatrixcholesky(complex_2d_array & a,const ae_int_t n,const bool isupper,const xparams _xparams)8680 bool hpdmatrixcholesky(complex_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
8681 {
8682     jmp_buf _break_jump;
8683     alglib_impl::ae_state _alglib_env_state;
8684     alglib_impl::ae_state_init(&_alglib_env_state);
8685     if( setjmp(_break_jump) )
8686     {
8687 #if !defined(AE_NO_EXCEPTIONS)
8688         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8689 #else
8690         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8691         return 0;
8692 #endif
8693     }
8694     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8695     if( _xparams.flags!=0x0 )
8696         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8697     ae_bool result = alglib_impl::hpdmatrixcholesky(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
8698     alglib_impl::ae_state_clear(&_alglib_env_state);
8699     return *(reinterpret_cast<bool*>(&result));
8700 }
8701 
8702 /*************************************************************************
8703 Cache-oblivious Cholesky decomposition
8704 
8705 The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
8706 definite matrix. The result of an algorithm is a representation  of  A  as
8707 A=U^T*U  or A=L*L^T
8708 
8709 INPUT PARAMETERS:
8710     A       -   upper or lower triangle of a factorized matrix.
8711                 array with elements [0..N-1, 0..N-1].
8712     N       -   size of matrix A.
8713     IsUpper -   if IsUpper=True, then A contains an upper triangle of
8714                 a symmetric matrix, otherwise A contains a lower one.
8715 
8716 OUTPUT PARAMETERS:
8717     A       -   the result of factorization. If IsUpper=True, then
8718                 the upper triangle contains matrix U, so that A = U^T*U,
8719                 and the elements below the main diagonal are not modified.
8720                 Similarly, if IsUpper = False.
8721 
8722 RESULT:
8723     If  the  matrix  is  positive-definite,  the  function  returns  True.
8724     Otherwise, the function returns False. Contents of A is not determined
8725     in such case.
8726 
8727   ! FREE EDITION OF ALGLIB:
8728   !
8729   ! Free Edition of ALGLIB supports following important features for  this
8730   ! function:
8731   ! * C++ version: x64 SIMD support using C++ intrinsics
8732   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
8733   !
8734   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
8735   ! Reference Manual in order  to  find  out  how to activate SIMD support
8736   ! in ALGLIB.
8737 
8738   ! COMMERCIAL EDITION OF ALGLIB:
8739   !
8740   ! Commercial Edition of ALGLIB includes following important improvements
8741   ! of this function:
8742   ! * high-performance native backend with same C# interface (C# version)
8743   ! * multithreading support (C++ and C# versions)
8744   ! * hardware vendor (Intel) implementations of linear algebra primitives
8745   !   (C++ and C# versions, x86/x64 platform)
8746   !
8747   ! We recommend you to read 'Working with commercial version' section  of
8748   ! ALGLIB Reference Manual in order to find out how to  use  performance-
8749   ! related features provided by commercial edition of ALGLIB.
8750 
8751   -- ALGLIB routine --
8752      15.12.2009
8753      Bochkanov Sergey
8754 *************************************************************************/
spdmatrixcholesky(real_2d_array & a,const ae_int_t n,const bool isupper,const xparams _xparams)8755 bool spdmatrixcholesky(real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
8756 {
8757     jmp_buf _break_jump;
8758     alglib_impl::ae_state _alglib_env_state;
8759     alglib_impl::ae_state_init(&_alglib_env_state);
8760     if( setjmp(_break_jump) )
8761     {
8762 #if !defined(AE_NO_EXCEPTIONS)
8763         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8764 #else
8765         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8766         return 0;
8767 #endif
8768     }
8769     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8770     if( _xparams.flags!=0x0 )
8771         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8772     ae_bool result = alglib_impl::spdmatrixcholesky(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
8773     alglib_impl::ae_state_clear(&_alglib_env_state);
8774     return *(reinterpret_cast<bool*>(&result));
8775 }
8776 
8777 /*************************************************************************
8778 Update of Cholesky decomposition: rank-1 update to original A.  "Buffered"
8779 version which uses preallocated buffer which is saved  between  subsequent
8780 function calls.
8781 
8782 This function uses internally allocated buffer which is not saved  between
8783 subsequent  calls.  So,  if  you  perform  a lot  of  subsequent  updates,
8784 we  recommend   you   to   use   "buffered"   version   of  this function:
8785 SPDMatrixCholeskyUpdateAdd1Buf().
8786 
8787 INPUT PARAMETERS:
8788     A       -   upper or lower Cholesky factor.
8789                 array with elements [0..N-1, 0..N-1].
8790                 Exception is thrown if array size is too small.
8791     N       -   size of matrix A, N>0
8792     IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
8793                 otherwise A contains a lower one.
8794     U       -   array[N], rank-1 update to A: A_mod = A + u*u'
8795                 Exception is thrown if array size is too small.
8796     BufR    -   possibly preallocated  buffer;  automatically  resized  if
8797                 needed. It is recommended to  reuse  this  buffer  if  you
8798                 perform a lot of subsequent decompositions.
8799 
8800 OUTPUT PARAMETERS:
8801     A       -   updated factorization.  If  IsUpper=True,  then  the  upper
8802                 triangle contains matrix U, and the elements below the main
8803                 diagonal are not modified. Similarly, if IsUpper = False.
8804 
8805 NOTE: this function always succeeds, so it does not return completion code
8806 
8807 NOTE: this function checks sizes of input arrays, but it does  NOT  checks
8808       for presence of infinities or NAN's.
8809 
8810   -- ALGLIB --
8811      03.02.2014
8812      Sergey Bochkanov
8813 *************************************************************************/
spdmatrixcholeskyupdateadd1(const real_2d_array & a,const ae_int_t n,const bool isupper,const real_1d_array & u,const xparams _xparams)8814 void spdmatrixcholeskyupdateadd1(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &u, const xparams _xparams)
8815 {
8816     jmp_buf _break_jump;
8817     alglib_impl::ae_state _alglib_env_state;
8818     alglib_impl::ae_state_init(&_alglib_env_state);
8819     if( setjmp(_break_jump) )
8820     {
8821 #if !defined(AE_NO_EXCEPTIONS)
8822         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8823 #else
8824         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8825         return;
8826 #endif
8827     }
8828     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8829     if( _xparams.flags!=0x0 )
8830         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8831     alglib_impl::spdmatrixcholeskyupdateadd1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), &_alglib_env_state);
8832     alglib_impl::ae_state_clear(&_alglib_env_state);
8833     return;
8834 }
8835 
8836 /*************************************************************************
8837 Update of Cholesky decomposition: "fixing" some variables.
8838 
8839 This function uses internally allocated buffer which is not saved  between
8840 subsequent  calls.  So,  if  you  perform  a lot  of  subsequent  updates,
8841 we  recommend   you   to   use   "buffered"   version   of  this function:
8842 SPDMatrixCholeskyUpdateFixBuf().
8843 
8844 "FIXING" EXPLAINED:
8845 
8846     Suppose we have N*N positive definite matrix A. "Fixing" some variable
8847     means filling corresponding row/column of  A  by  zeros,  and  setting
8848     diagonal element to 1.
8849 
8850     For example, if we fix 2nd variable in 4*4 matrix A, it becomes Af:
8851 
8852         ( A00  A01  A02  A03 )      ( Af00  0   Af02 Af03 )
8853         ( A10  A11  A12  A13 )      (  0    1    0    0   )
8854         ( A20  A21  A22  A23 )  =>  ( Af20  0   Af22 Af23 )
8855         ( A30  A31  A32  A33 )      ( Af30  0   Af32 Af33 )
8856 
8857     If we have Cholesky decomposition of A, it must be recalculated  after
8858     variables were  fixed.  However,  it  is  possible  to  use  efficient
8859     algorithm, which needs O(K*N^2)  time  to  "fix"  K  variables,  given
8860     Cholesky decomposition of original, "unfixed" A.
8861 
8862 INPUT PARAMETERS:
8863     A       -   upper or lower Cholesky factor.
8864                 array with elements [0..N-1, 0..N-1].
8865                 Exception is thrown if array size is too small.
8866     N       -   size of matrix A, N>0
8867     IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
8868                 otherwise A contains a lower one.
8869     Fix     -   array[N], I-th element is True if I-th  variable  must  be
8870                 fixed. Exception is thrown if array size is too small.
8871     BufR    -   possibly preallocated  buffer;  automatically  resized  if
8872                 needed. It is recommended to  reuse  this  buffer  if  you
8873                 perform a lot of subsequent decompositions.
8874 
8875 OUTPUT PARAMETERS:
8876     A       -   updated factorization.  If  IsUpper=True,  then  the  upper
8877                 triangle contains matrix U, and the elements below the main
8878                 diagonal are not modified. Similarly, if IsUpper = False.
8879 
8880 NOTE: this function always succeeds, so it does not return completion code
8881 
8882 NOTE: this function checks sizes of input arrays, but it does  NOT  checks
8883       for presence of infinities or NAN's.
8884 
8885 NOTE: this  function  is  efficient  only  for  moderate amount of updated
8886       variables - say, 0.1*N or 0.3*N. For larger amount of  variables  it
8887       will  still  work,  but  you  may  get   better   performance   with
8888       straightforward Cholesky.
8889 
8890   -- ALGLIB --
8891      03.02.2014
8892      Sergey Bochkanov
8893 *************************************************************************/
spdmatrixcholeskyupdatefix(const real_2d_array & a,const ae_int_t n,const bool isupper,const boolean_1d_array & fix,const xparams _xparams)8894 void spdmatrixcholeskyupdatefix(const real_2d_array &a, const ae_int_t n, const bool isupper, const boolean_1d_array &fix, const xparams _xparams)
8895 {
8896     jmp_buf _break_jump;
8897     alglib_impl::ae_state _alglib_env_state;
8898     alglib_impl::ae_state_init(&_alglib_env_state);
8899     if( setjmp(_break_jump) )
8900     {
8901 #if !defined(AE_NO_EXCEPTIONS)
8902         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8903 #else
8904         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8905         return;
8906 #endif
8907     }
8908     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8909     if( _xparams.flags!=0x0 )
8910         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8911     alglib_impl::spdmatrixcholeskyupdatefix(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(fix.c_ptr()), &_alglib_env_state);
8912     alglib_impl::ae_state_clear(&_alglib_env_state);
8913     return;
8914 }
8915 
8916 /*************************************************************************
8917 Update of Cholesky decomposition: rank-1 update to original A.  "Buffered"
8918 version which uses preallocated buffer which is saved  between  subsequent
8919 function calls.
8920 
8921 See comments for SPDMatrixCholeskyUpdateAdd1() for more information.
8922 
8923 INPUT PARAMETERS:
8924     A       -   upper or lower Cholesky factor.
8925                 array with elements [0..N-1, 0..N-1].
8926                 Exception is thrown if array size is too small.
8927     N       -   size of matrix A, N>0
8928     IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
8929                 otherwise A contains a lower one.
8930     U       -   array[N], rank-1 update to A: A_mod = A + u*u'
8931                 Exception is thrown if array size is too small.
8932     BufR    -   possibly preallocated  buffer;  automatically  resized  if
8933                 needed. It is recommended to  reuse  this  buffer  if  you
8934                 perform a lot of subsequent decompositions.
8935 
8936 OUTPUT PARAMETERS:
8937     A       -   updated factorization.  If  IsUpper=True,  then  the  upper
8938                 triangle contains matrix U, and the elements below the main
8939                 diagonal are not modified. Similarly, if IsUpper = False.
8940 
8941   -- ALGLIB --
8942      03.02.2014
8943      Sergey Bochkanov
8944 *************************************************************************/
spdmatrixcholeskyupdateadd1buf(const real_2d_array & a,const ae_int_t n,const bool isupper,const real_1d_array & u,real_1d_array & bufr,const xparams _xparams)8945 void spdmatrixcholeskyupdateadd1buf(const real_2d_array &a, const ae_int_t n, const bool isupper, const real_1d_array &u, real_1d_array &bufr, const xparams _xparams)
8946 {
8947     jmp_buf _break_jump;
8948     alglib_impl::ae_state _alglib_env_state;
8949     alglib_impl::ae_state_init(&_alglib_env_state);
8950     if( setjmp(_break_jump) )
8951     {
8952 #if !defined(AE_NO_EXCEPTIONS)
8953         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8954 #else
8955         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8956         return;
8957 #endif
8958     }
8959     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8960     if( _xparams.flags!=0x0 )
8961         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8962     alglib_impl::spdmatrixcholeskyupdateadd1buf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::ae_vector*>(bufr.c_ptr()), &_alglib_env_state);
8963     alglib_impl::ae_state_clear(&_alglib_env_state);
8964     return;
8965 }
8966 
8967 /*************************************************************************
8968 Update of Cholesky  decomposition:  "fixing"  some  variables.  "Buffered"
8969 version which uses preallocated buffer which is saved  between  subsequent
8970 function calls.
8971 
8972 See comments for SPDMatrixCholeskyUpdateFix() for more information.
8973 
8974 INPUT PARAMETERS:
8975     A       -   upper or lower Cholesky factor.
8976                 array with elements [0..N-1, 0..N-1].
8977                 Exception is thrown if array size is too small.
8978     N       -   size of matrix A, N>0
8979     IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
8980                 otherwise A contains a lower one.
8981     Fix     -   array[N], I-th element is True if I-th  variable  must  be
8982                 fixed. Exception is thrown if array size is too small.
8983     BufR    -   possibly preallocated  buffer;  automatically  resized  if
8984                 needed. It is recommended to  reuse  this  buffer  if  you
8985                 perform a lot of subsequent decompositions.
8986 
8987 OUTPUT PARAMETERS:
8988     A       -   updated factorization.  If  IsUpper=True,  then  the  upper
8989                 triangle contains matrix U, and the elements below the main
8990                 diagonal are not modified. Similarly, if IsUpper = False.
8991 
8992   -- ALGLIB --
8993      03.02.2014
8994      Sergey Bochkanov
8995 *************************************************************************/
spdmatrixcholeskyupdatefixbuf(const real_2d_array & a,const ae_int_t n,const bool isupper,const boolean_1d_array & fix,real_1d_array & bufr,const xparams _xparams)8996 void spdmatrixcholeskyupdatefixbuf(const real_2d_array &a, const ae_int_t n, const bool isupper, const boolean_1d_array &fix, real_1d_array &bufr, const xparams _xparams)
8997 {
8998     jmp_buf _break_jump;
8999     alglib_impl::ae_state _alglib_env_state;
9000     alglib_impl::ae_state_init(&_alglib_env_state);
9001     if( setjmp(_break_jump) )
9002     {
9003 #if !defined(AE_NO_EXCEPTIONS)
9004         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9005 #else
9006         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9007         return;
9008 #endif
9009     }
9010     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9011     if( _xparams.flags!=0x0 )
9012         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9013     alglib_impl::spdmatrixcholeskyupdatefixbuf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, const_cast<alglib_impl::ae_vector*>(fix.c_ptr()), const_cast<alglib_impl::ae_vector*>(bufr.c_ptr()), &_alglib_env_state);
9014     alglib_impl::ae_state_clear(&_alglib_env_state);
9015     return;
9016 }
9017 
9018 /*************************************************************************
9019 Sparse LU decomposition with column pivoting for sparsity and row pivoting
9020 for stability. Input must be square sparse matrix stored in CRS format.
9021 
9022 The algorithm  computes  LU  decomposition  of  a  general  square  matrix
9023 (rectangular ones are not supported). The result  of  an  algorithm  is  a
9024 representation of A as A = P*L*U*Q, where:
9025 * L is lower unitriangular matrix
9026 * U is upper triangular matrix
9027 * P = P0*P1*...*PK, K=N-1, Pi - permutation matrix for I and P[I]
9028 * Q = QK*...*Q1*Q0, K=N-1, Qi - permutation matrix for I and Q[I]
9029 
9030 This function pivots columns for higher sparsity, and then pivots rows for
9031 stability (larger element at the diagonal).
9032 
9033 INPUT PARAMETERS:
9034     A       -   sparse NxN matrix in CRS format. An exception is generated
9035                 if matrix is non-CRS or non-square.
9036     PivotType-  pivoting strategy:
9037                 * 0 for best pivoting available (2 in current version)
9038                 * 1 for row-only pivoting (NOT RECOMMENDED)
9039                 * 2 for complete pivoting which produces most sparse outputs
9040 
9041 OUTPUT PARAMETERS:
9042     A       -   the result of factorization, matrices L and U stored in
9043                 compact form using CRS sparse storage format:
9044                 * lower unitriangular L is stored strictly under main diagonal
9045                 * upper triangilar U is stored ON and ABOVE main diagonal
9046     P       -   row permutation matrix in compact form, array[N]
9047     Q       -   col permutation matrix in compact form, array[N]
9048 
9049 This function always succeeds, i.e. it ALWAYS returns valid factorization,
9050 but for your convenience it also returns  boolean  value  which  helps  to
9051 detect symbolically degenerate matrices:
9052 * function returns TRUE, if the matrix was factorized AND symbolically
9053   non-degenerate
9054 * function returns FALSE, if the matrix was factorized but U has strictly
9055   zero elements at the diagonal (the factorization is returned anyway).
9056 
9057 
9058   -- ALGLIB routine --
9059      03.09.2018
9060      Bochkanov Sergey
9061 *************************************************************************/
sparselu(const sparsematrix & a,const ae_int_t pivottype,integer_1d_array & p,integer_1d_array & q,const xparams _xparams)9062 bool sparselu(const sparsematrix &a, const ae_int_t pivottype, integer_1d_array &p, integer_1d_array &q, const xparams _xparams)
9063 {
9064     jmp_buf _break_jump;
9065     alglib_impl::ae_state _alglib_env_state;
9066     alglib_impl::ae_state_init(&_alglib_env_state);
9067     if( setjmp(_break_jump) )
9068     {
9069 #if !defined(AE_NO_EXCEPTIONS)
9070         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9071 #else
9072         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9073         return 0;
9074 #endif
9075     }
9076     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9077     if( _xparams.flags!=0x0 )
9078         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9079     ae_bool result = alglib_impl::sparselu(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), pivottype, const_cast<alglib_impl::ae_vector*>(p.c_ptr()), const_cast<alglib_impl::ae_vector*>(q.c_ptr()), &_alglib_env_state);
9080     alglib_impl::ae_state_clear(&_alglib_env_state);
9081     return *(reinterpret_cast<bool*>(&result));
9082 }
9083 
9084 /*************************************************************************
9085 Sparse Cholesky decomposition for skyline matrixm using in-place algorithm
9086 without allocating additional storage.
9087 
9088 The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
9089 definite sparse matrix. The result of an algorithm is a representation  of
9090 A as A=U^T*U or A=L*L^T
9091 
9092 This function allows to perform very efficient decomposition of low-profile
9093 matrices (average bandwidth is ~5-10 elements). For larger matrices it  is
9094 recommended to use supernodal Cholesky decomposition: SparseCholeskyP() or
9095 SparseCholeskyAnalyze()/SparseCholeskyFactorize().
9096 
9097 INPUT PARAMETERS:
9098     A       -   sparse matrix in skyline storage (SKS) format.
9099     N       -   size of matrix A (can be smaller than actual size of A)
9100     IsUpper -   if IsUpper=True, then factorization is performed on  upper
9101                 triangle. Another triangle is ignored (it may contant some
9102                 data, but it is not changed).
9103 
9104 
9105 OUTPUT PARAMETERS:
9106     A       -   the result of factorization, stored in SKS. If IsUpper=True,
9107                 then the upper  triangle  contains  matrix  U,  such  that
9108                 A = U^T*U. Lower triangle is not changed.
9109                 Similarly, if IsUpper = False. In this case L is returned,
9110                 and we have A = L*(L^T).
9111                 Note that THIS function does not  perform  permutation  of
9112                 rows to reduce bandwidth.
9113 
9114 RESULT:
9115     If  the  matrix  is  positive-definite,  the  function  returns  True.
9116     Otherwise, the function returns False. Contents of A is not determined
9117     in such case.
9118 
9119 NOTE: for  performance  reasons  this  function  does NOT check that input
9120       matrix  includes  only  finite  values. It is your responsibility to
9121       make sure that there are no infinite or NAN values in the matrix.
9122 
9123   -- ALGLIB routine --
9124      16.01.2014
9125      Bochkanov Sergey
9126 *************************************************************************/
sparsecholeskyskyline(const sparsematrix & a,const ae_int_t n,const bool isupper,const xparams _xparams)9127 bool sparsecholeskyskyline(const sparsematrix &a, const ae_int_t n, const bool isupper, const xparams _xparams)
9128 {
9129     jmp_buf _break_jump;
9130     alglib_impl::ae_state _alglib_env_state;
9131     alglib_impl::ae_state_init(&_alglib_env_state);
9132     if( setjmp(_break_jump) )
9133     {
9134 #if !defined(AE_NO_EXCEPTIONS)
9135         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9136 #else
9137         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9138         return 0;
9139 #endif
9140     }
9141     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9142     if( _xparams.flags!=0x0 )
9143         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9144     ae_bool result = alglib_impl::sparsecholeskyskyline(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
9145     alglib_impl::ae_state_clear(&_alglib_env_state);
9146     return *(reinterpret_cast<bool*>(&result));
9147 }
9148 
9149 /*************************************************************************
9150 Sparse Cholesky decomposition for a matrix  stored  in  any sparse storage,
9151 without rows/cols permutation.
9152 
9153 This function is the most convenient (less parameters to specify), although
9154 less efficient, version of sparse Cholesky.
9155 
9156 Internally it:
9157 * calls SparseCholeskyAnalyze()  function  to  perform  symbolic  analysis
9158   phase with no permutation being configured.
9159 * calls SparseCholeskyFactorize() function to perform numerical  phase  of
9160   the factorization
9161 
9162 Following alternatives may result in better performance:
9163 * using SparseCholeskyP(), which selects best  pivoting  available,  which
9164   almost always results in improved sparsity and cache locality
9165 * using  SparseCholeskyAnalyze() and SparseCholeskyFactorize()   functions
9166   directly,  which  may  improve  performance of repetitive factorizations
9167   with same sparsity patterns.
9168 
9169 The latter also allows one to perform  LDLT  factorization  of  indefinite
9170 matrix (one with strictly diagonal D, which is known  to  be  stable  only
9171 in few special cases, like quasi-definite matrices).
9172 
9173 INPUT PARAMETERS:
9174     A       -   a square NxN sparse matrix, stored in any storage format.
9175     IsUpper -   if IsUpper=True, then factorization is performed on  upper
9176                 triangle.  Another triangle is ignored on  input,  dropped
9177                 on output. Similarly, if IsUpper=False, the lower triangle
9178                 is processed.
9179 
9180 OUTPUT PARAMETERS:
9181     A       -   the result of factorization, stored in CRS format:
9182                 * if IsUpper=True, then the upper triangle contains matrix
9183                   U such  that  A = U^T*U and the lower triangle is empty.
9184                 * similarly, if IsUpper=False, then lower triangular L  is
9185                   returned and we have A = L*(L^T).
9186                 Note that THIS function does not  perform  permutation  of
9187                 the rows to reduce fill-in.
9188 
9189 RESULT:
9190     If  the  matrix  is  positive-definite,  the  function  returns  True.
9191     Otherwise, the function returns False.  Contents  of  A  is  undefined
9192     in such case.
9193 
9194 NOTE: for  performance  reasons  this  function  does NOT check that input
9195       matrix  includes  only  finite  values. It is your responsibility to
9196       make sure that there are no infinite or NAN values in the matrix.
9197 
9198   -- ALGLIB routine --
9199      16.09.2020
9200      Bochkanov Sergey
9201 *************************************************************************/
sparsecholesky(const sparsematrix & a,const bool isupper,const xparams _xparams)9202 bool sparsecholesky(const sparsematrix &a, const bool isupper, const xparams _xparams)
9203 {
9204     jmp_buf _break_jump;
9205     alglib_impl::ae_state _alglib_env_state;
9206     alglib_impl::ae_state_init(&_alglib_env_state);
9207     if( setjmp(_break_jump) )
9208     {
9209 #if !defined(AE_NO_EXCEPTIONS)
9210         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9211 #else
9212         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9213         return 0;
9214 #endif
9215     }
9216     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9217     if( _xparams.flags!=0x0 )
9218         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9219     ae_bool result = alglib_impl::sparsecholesky(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, &_alglib_env_state);
9220     alglib_impl::ae_state_clear(&_alglib_env_state);
9221     return *(reinterpret_cast<bool*>(&result));
9222 }
9223 
9224 /*************************************************************************
9225 Sparse Cholesky decomposition for a matrix  stored  in  any sparse storage
9226 format, with performance-enhancing permutation of rows/cols.
9227 
9228 Present version is configured  to  perform  supernodal  permutation  which
9229 sparsity reducing ordering.
9230 
9231 This function is a wrapper around generic sparse  decomposition  functions
9232 that internally:
9233 * calls SparseCholeskyAnalyze()  function  to  perform  symbolic  analysis
9234   phase with best available permutation being configured.
9235 * calls SparseCholeskyFactorize() function to perform numerical  phase  of
9236   the factorization.
9237 
9238 NOTE: using  SparseCholeskyAnalyze() and SparseCholeskyFactorize() directly
9239       may improve  performance  of  repetitive  factorizations  with  same
9240       sparsity patterns. It also allows one to perform  LDLT factorization
9241       of  indefinite  matrix  -  a factorization with strictly diagonal D,
9242       which  is  known to be stable only in few special cases, like quasi-
9243       definite matrices.
9244 
9245 INPUT PARAMETERS:
9246     A       -   a square NxN sparse matrix, stored in any storage format.
9247     IsUpper -   if IsUpper=True, then factorization is performed on  upper
9248                 triangle.  Another triangle is ignored on  input,  dropped
9249                 on output. Similarly, if IsUpper=False, the lower triangle
9250                 is processed.
9251 
9252 OUTPUT PARAMETERS:
9253     A       -   the result of factorization, stored in CRS format:
9254                 * if IsUpper=True, then the upper triangle contains matrix
9255                   U such  that  A = U^T*U and the lower triangle is empty.
9256                 * similarly, if IsUpper=False, then lower triangular L  is
9257                   returned and we have A = L*(L^T).
9258     P       -   a row/column permutation, a product of P0*P1*...*Pk, k=N-1,
9259                 with Pi being permutation of rows/cols I and P[I]
9260 
9261 RESULT:
9262     If  the  matrix  is  positive-definite,  the  function  returns  True.
9263     Otherwise, the function returns False.  Contents  of  A  is  undefined
9264     in such case.
9265 
9266 NOTE: for  performance  reasons  this  function  does NOT check that input
9267       matrix  includes  only  finite  values. It is your responsibility to
9268       make sure that there are no infinite or NAN values in the matrix.
9269 
9270   -- ALGLIB routine --
9271      16.09.2020
9272      Bochkanov Sergey
9273 *************************************************************************/
sparsecholeskyp(const sparsematrix & a,const bool isupper,integer_1d_array & p,const xparams _xparams)9274 bool sparsecholeskyp(const sparsematrix &a, const bool isupper, integer_1d_array &p, const xparams _xparams)
9275 {
9276     jmp_buf _break_jump;
9277     alglib_impl::ae_state _alglib_env_state;
9278     alglib_impl::ae_state_init(&_alglib_env_state);
9279     if( setjmp(_break_jump) )
9280     {
9281 #if !defined(AE_NO_EXCEPTIONS)
9282         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9283 #else
9284         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9285         return 0;
9286 #endif
9287     }
9288     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9289     if( _xparams.flags!=0x0 )
9290         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9291     ae_bool result = alglib_impl::sparsecholeskyp(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, const_cast<alglib_impl::ae_vector*>(p.c_ptr()), &_alglib_env_state);
9292     alglib_impl::ae_state_clear(&_alglib_env_state);
9293     return *(reinterpret_cast<bool*>(&result));
9294 }
9295 
9296 /*************************************************************************
9297 Sparse Cholesky/LDLT decomposition: symbolic analysis phase.
9298 
9299 This function is a part of the 'expert' sparse Cholesky API:
9300 * SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
9301   matrix to be factorized into internal storage
9302 * SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
9303   with lower bounds on pivot magnitudes and additional overflow safeguards
9304 * SparseCholeskyFactorize(),  that performs  numeric  factorization  using
9305   precomputed symbolic analysis and internally stored matrix - and outputs
9306   result
9307 * SparseCholeskyReload(), that reloads one more matrix with same  sparsity
9308   pattern into internal storage so  one  may  reuse  previously  allocated
9309   temporaries and previously performed symbolic analysis
9310 
9311 This specific function performs preliminary analysis of the  Cholesky/LDLT
9312 factorization. It allows to choose  different  permutation  types  and  to
9313 choose between classic Cholesky and  indefinite  LDLT  factorization  (the
9314 latter is computed with strictly diagonal D, i.e.  without  Bunch-Kauffman
9315 pivoting).
9316 
9317 NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
9318       matrices. However, numerical stability is guaranteed ONLY for a class
9319       of quasi-definite matrices.
9320 
9321 NOTE: all internal processing is performed with lower triangular  matrices
9322       stored  in  CRS  format.  Any  other  storage  formats  and/or upper
9323       triangular storage means  that  one  format  conversion  and/or  one
9324       transposition will be performed  internally  for  the  analysis  and
9325       factorization phases. Thus, highest  performance  is  achieved  when
9326       input is a lower triangular CRS matrix.
9327 
9328 INPUT PARAMETERS:
9329     A           -   sparse square matrix in any sparse storage format.
9330     IsUpper     -   whether upper or lower  triangle  is  decomposed  (the
9331                     other one is ignored).
9332     FactType    -   factorization type:
9333                     * 0 for traditional Cholesky of SPD matrix
9334                     * 1 for LDLT decomposition with strictly  diagonal  D,
9335                         which may have non-positive entries.
9336     PermType    -   permutation type:
9337                     *-1 for absence of permutation
9338                     * 0 for best fill-in reducing  permutation  available,
9339                         which is 3 in the current version
9340                     * 1 for supernodal ordering (improves locality and
9341                       performance, does NOT change fill-in factor)
9342                     * 2 for original AMD ordering
9343                     * 3 for  improved  AMD  (approximate  minimum  degree)
9344                         ordering with better  handling  of  matrices  with
9345                         dense rows/columns
9346 
9347 OUTPUT PARAMETERS:
9348     Analysis    -   contains:
9349                     * symbolic analysis of the matrix structure which will
9350                       be used later to guide numerical factorization.
9351                     * specific numeric values loaded into internal  memory
9352                       waiting for the factorization to be performed
9353 
9354 This function fails if and only if the matrix A is symbolically degenerate
9355 i.e. has diagonal element which is exactly zero. In  such  case  False  is
9356 returned, contents of Analysis object is undefined.
9357 
9358   -- ALGLIB routine --
9359      20.09.2020
9360      Bochkanov Sergey
9361 *************************************************************************/
sparsecholeskyanalyze(const sparsematrix & a,const bool isupper,const ae_int_t facttype,const ae_int_t permtype,sparsedecompositionanalysis & analysis,const xparams _xparams)9362 bool sparsecholeskyanalyze(const sparsematrix &a, const bool isupper, const ae_int_t facttype, const ae_int_t permtype, sparsedecompositionanalysis &analysis, const xparams _xparams)
9363 {
9364     jmp_buf _break_jump;
9365     alglib_impl::ae_state _alglib_env_state;
9366     alglib_impl::ae_state_init(&_alglib_env_state);
9367     if( setjmp(_break_jump) )
9368     {
9369 #if !defined(AE_NO_EXCEPTIONS)
9370         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9371 #else
9372         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9373         return 0;
9374 #endif
9375     }
9376     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9377     if( _xparams.flags!=0x0 )
9378         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9379     ae_bool result = alglib_impl::sparsecholeskyanalyze(const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, facttype, permtype, const_cast<alglib_impl::sparsedecompositionanalysis*>(analysis.c_ptr()), &_alglib_env_state);
9380     alglib_impl::ae_state_clear(&_alglib_env_state);
9381     return *(reinterpret_cast<bool*>(&result));
9382 }
9383 
9384 /*************************************************************************
9385 Sparse Cholesky decomposition: numerical analysis phase.
9386 
9387 This function is a part of the 'expert' sparse Cholesky API:
9388 * SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
9389   matrix to be factorized into internal storage
9390 * SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
9391   with lower bounds on pivot magnitudes and additional overflow safeguards
9392 * SparseCholeskyFactorize(),  that performs  numeric  factorization  using
9393   precomputed symbolic analysis and internally stored matrix - and outputs
9394   result
9395 * SparseCholeskyReload(), that reloads one more matrix with same  sparsity
9396   pattern into internal storage so  one  may  reuse  previously  allocated
9397   temporaries and previously performed symbolic analysis
9398 
9399 Depending on settings specified during SparseCholeskyAnalyze() call it may
9400 produce classic Cholesky or L*D*LT  decomposition  (with strictly diagonal
9401 D), without permutation or with performance-enhancing permutation P.
9402 
9403 NOTE: all internal processing is performed with lower triangular  matrices
9404       stored  in  CRS  format.  Any  other  storage  formats  and/or upper
9405       triangular storage means  that  one  format  conversion  and/or  one
9406       transposition will be performed  internally  for  the  analysis  and
9407       factorization phases. Thus, highest  performance  is  achieved  when
9408       input is a lower triangular CRS matrix, and lower triangular  output
9409       is requested.
9410 
9411 NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
9412       matrices. However, numerical stability is guaranteed ONLY for a class
9413       of quasi-definite matrices.
9414 
9415 INPUT PARAMETERS:
9416     Analysis    -   prior analysis with internally stored matrix which will
9417                     be factorized
9418     NeedUpper   -   whether upper triangular or lower triangular output is
9419                     needed
9420 
9421 OUTPUT PARAMETERS:
9422     A           -   Cholesky decomposition of A stored in lower triangular
9423                     CRS format, i.e. A=L*L' (or upper triangular CRS, with
9424                     A=U'*U, depending on NeedUpper parameter).
9425     D           -   array[N], diagonal factor. If no diagonal  factor  was
9426                     required during analysis  phase,  still  returned  but
9427                     filled with 1's
9428     P           -   array[N], pivots. Permutation matrix P is a product of
9429                     P(0)*P(1)*...*P(N-1), where P(i) is a  permutation  of
9430                     row/col I and P[I] (with P[I]>=I).
9431                     If no permutation was requested during analysis phase,
9432                     still returned but filled with identity permutation.
9433 
9434 The function returns True  when  factorization  resulted  in nondegenerate
9435 matrix. False is returned when factorization fails (Cholesky factorization
9436 of indefinite matrix) or LDLT factorization has exactly zero  elements  at
9437 the diagonal. In the latter case contents of A, D and P is undefined.
9438 
9439 The analysis object is not changed during  the  factorization.  Subsequent
9440 calls to SparseCholeskyFactorize() will result in same factorization being
9441 performed one more time.
9442 
9443   -- ALGLIB routine --
9444      20.09.2020
9445      Bochkanov Sergey
9446 *************************************************************************/
sparsecholeskyfactorize(const sparsedecompositionanalysis & analysis,const bool needupper,sparsematrix & a,real_1d_array & d,integer_1d_array & p,const xparams _xparams)9447 bool sparsecholeskyfactorize(const sparsedecompositionanalysis &analysis, const bool needupper, sparsematrix &a, real_1d_array &d, integer_1d_array &p, const xparams _xparams)
9448 {
9449     jmp_buf _break_jump;
9450     alglib_impl::ae_state _alglib_env_state;
9451     alglib_impl::ae_state_init(&_alglib_env_state);
9452     if( setjmp(_break_jump) )
9453     {
9454 #if !defined(AE_NO_EXCEPTIONS)
9455         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9456 #else
9457         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9458         return 0;
9459 #endif
9460     }
9461     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9462     if( _xparams.flags!=0x0 )
9463         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9464     ae_bool result = alglib_impl::sparsecholeskyfactorize(const_cast<alglib_impl::sparsedecompositionanalysis*>(analysis.c_ptr()), needupper, const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(p.c_ptr()), &_alglib_env_state);
9465     alglib_impl::ae_state_clear(&_alglib_env_state);
9466     return *(reinterpret_cast<bool*>(&result));
9467 }
9468 
9469 /*************************************************************************
9470 Sparse  Cholesky  decomposition:  update  internally  stored  matrix  with
9471 another one with exactly same sparsity pattern.
9472 
9473 This function is a part of the 'expert' sparse Cholesky API:
9474 * SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
9475   matrix to be factorized into internal storage
9476 * SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
9477   with lower bounds on pivot magnitudes and additional overflow safeguards
9478 * SparseCholeskyFactorize(),  that performs  numeric  factorization  using
9479   precomputed symbolic analysis and internally stored matrix - and outputs
9480   result
9481 * SparseCholeskyReload(), that reloads one more matrix with same  sparsity
9482   pattern into internal storage so  one  may  reuse  previously  allocated
9483   temporaries and previously performed symbolic analysis
9484 
9485 This specific function replaces internally stored  numerical  values  with
9486 ones from another sparse matrix (but having exactly same sparsity  pattern
9487 as one that was used for initial SparseCholeskyAnalyze() call).
9488 
9489 NOTE: all internal processing is performed with lower triangular  matrices
9490       stored  in  CRS  format.  Any  other  storage  formats  and/or upper
9491       triangular storage means  that  one  format  conversion  and/or  one
9492       transposition will be performed  internally  for  the  analysis  and
9493       factorization phases. Thus, highest  performance  is  achieved  when
9494       input is a lower triangular CRS matrix.
9495 
9496 INPUT PARAMETERS:
9497     Analysis    -   analysis object
9498     A           -   sparse square matrix in any sparse storage format.  It
9499                     MUST have exactly same sparsity pattern as that of the
9500                     matrix that was passed to SparseCholeskyAnalyze().
9501                     Any difference (missing elements or additional elements)
9502                     may result in unpredictable and undefined  behavior  -
9503                     an algorithm may fail due to memory access violation.
9504     IsUpper     -   whether upper or lower  triangle  is  decomposed  (the
9505                     other one is ignored).
9506 
9507 OUTPUT PARAMETERS:
9508     Analysis    -   contains:
9509                     * symbolic analysis of the matrix structure which will
9510                       be used later to guide numerical factorization.
9511                     * specific numeric values loaded into internal  memory
9512                       waiting for the factorization to be performed
9513 
9514   -- ALGLIB routine --
9515      20.09.2020
9516      Bochkanov Sergey
9517 *************************************************************************/
sparsecholeskyreload(const sparsedecompositionanalysis & analysis,const sparsematrix & a,const bool isupper,const xparams _xparams)9518 void sparsecholeskyreload(const sparsedecompositionanalysis &analysis, const sparsematrix &a, const bool isupper, const xparams _xparams)
9519 {
9520     jmp_buf _break_jump;
9521     alglib_impl::ae_state _alglib_env_state;
9522     alglib_impl::ae_state_init(&_alglib_env_state);
9523     if( setjmp(_break_jump) )
9524     {
9525 #if !defined(AE_NO_EXCEPTIONS)
9526         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9527 #else
9528         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9529         return;
9530 #endif
9531     }
9532     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9533     if( _xparams.flags!=0x0 )
9534         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9535     alglib_impl::sparsecholeskyreload(const_cast<alglib_impl::sparsedecompositionanalysis*>(analysis.c_ptr()), const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), isupper, &_alglib_env_state);
9536     alglib_impl::ae_state_clear(&_alglib_env_state);
9537     return;
9538 }
9539 #endif
9540 
9541 #if defined(AE_COMPILE_BDSVD) || !defined(AE_PARTIAL_BUILD)
9542 /*************************************************************************
9543 Singular value decomposition of a bidiagonal matrix (extended algorithm)
9544 
9545 COMMERCIAL EDITION OF ALGLIB:
9546 
9547   ! Commercial version of ALGLIB includes one  important  improvement   of
9548   ! this function, which can be used from C++ and C#:
9549   ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
9550   !
9551   ! Intel MKL gives approximately constant  (with  respect  to  number  of
9552   ! worker threads) acceleration factor which depends on CPU  being  used,
9553   ! problem  size  and  "baseline"  ALGLIB  edition  which  is  used   for
9554   ! comparison.
9555   !
9556   ! Generally, commercial ALGLIB is several times faster than  open-source
9557   ! generic C edition, and many times faster than open-source C# edition.
9558   !
9559   ! Multithreaded acceleration is NOT supported for this function.
9560   !
9561   ! We recommend you to read 'Working with commercial version' section  of
9562   ! ALGLIB Reference Manual in order to find out how to  use  performance-
9563   ! related features provided by commercial edition of ALGLIB.
9564 
9565 The algorithm performs the singular value decomposition  of  a  bidiagonal
9566 matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and  P -
9567 orthogonal matrices, S - diagonal matrix with non-negative elements on the
9568 main diagonal, in descending order.
9569 
9570 The  algorithm  finds  singular  values.  In  addition,  the algorithm can
9571 calculate  matrices  Q  and P (more precisely, not the matrices, but their
9572 product  with  given  matrices U and VT - U*Q and (P^T)*VT)).  Of  course,
9573 matrices U and VT can be of any type, including identity. Furthermore, the
9574 algorithm can calculate Q'*C (this product is calculated more  effectively
9575 than U*Q,  because  this calculation operates with rows instead  of matrix
9576 columns).
9577 
9578 The feature of the algorithm is its ability to find  all  singular  values
9579 including those which are arbitrarily close to 0  with  relative  accuracy
9580 close to  machine precision. If the parameter IsFractionalAccuracyRequired
9581 is set to True, all singular values will have high relative accuracy close
9582 to machine precision. If the parameter is set to False, only  the  biggest
9583 singular value will have relative accuracy  close  to  machine  precision.
9584 The absolute error of other singular values is equal to the absolute error
9585 of the biggest singular value.
9586 
9587 Input parameters:
9588     D       -   main diagonal of matrix B.
9589                 Array whose index ranges within [0..N-1].
9590     E       -   superdiagonal (or subdiagonal) of matrix B.
9591                 Array whose index ranges within [0..N-2].
9592     N       -   size of matrix B.
9593     IsUpper -   True, if the matrix is upper bidiagonal.
9594     IsFractionalAccuracyRequired -
9595                 THIS PARAMETER IS IGNORED SINCE ALGLIB 3.5.0
9596                 SINGULAR VALUES ARE ALWAYS SEARCHED WITH HIGH ACCURACY.
9597     U       -   matrix to be multiplied by Q.
9598                 Array whose indexes range within [0..NRU-1, 0..N-1].
9599                 The matrix can be bigger, in that case only the  submatrix
9600                 [0..NRU-1, 0..N-1] will be multiplied by Q.
9601     NRU     -   number of rows in matrix U.
9602     C       -   matrix to be multiplied by Q'.
9603                 Array whose indexes range within [0..N-1, 0..NCC-1].
9604                 The matrix can be bigger, in that case only the  submatrix
9605                 [0..N-1, 0..NCC-1] will be multiplied by Q'.
9606     NCC     -   number of columns in matrix C.
9607     VT      -   matrix to be multiplied by P^T.
9608                 Array whose indexes range within [0..N-1, 0..NCVT-1].
9609                 The matrix can be bigger, in that case only the  submatrix
9610                 [0..N-1, 0..NCVT-1] will be multiplied by P^T.
9611     NCVT    -   number of columns in matrix VT.
9612 
9613 Output parameters:
9614     D       -   singular values of matrix B in descending order.
9615     U       -   if NRU>0, contains matrix U*Q.
9616     VT      -   if NCVT>0, contains matrix (P^T)*VT.
9617     C       -   if NCC>0, contains matrix Q'*C.
9618 
9619 Result:
9620     True, if the algorithm has converged.
9621     False, if the algorithm hasn't converged (rare case).
9622 
9623 NOTE: multiplication U*Q is performed by means of transposition to internal
9624       buffer, multiplication and backward transposition. It helps to avoid
9625       costly columnwise operations and speed-up algorithm.
9626 
9627 Additional information:
9628     The type of convergence is controlled by the internal  parameter  TOL.
9629     If the parameter is greater than 0, the singular values will have
9630     relative accuracy TOL. If TOL<0, the singular values will have
9631     absolute accuracy ABS(TOL)*norm(B).
9632     By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon,
9633     where Epsilon is the machine precision. It is not  recommended  to  use
9634     TOL less than 10*Epsilon since this will  considerably  slow  down  the
9635     algorithm and may not lead to error decreasing.
9636 
9637 History:
9638     * 31 March, 2007.
9639         changed MAXITR from 6 to 12.
9640 
9641   -- LAPACK routine (version 3.0) --
9642      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
9643      Courant Institute, Argonne National Lab, and Rice University
9644      October 31, 1999.
9645 *************************************************************************/
rmatrixbdsvd(real_1d_array & d,const real_1d_array & e,const ae_int_t n,const bool isupper,const bool isfractionalaccuracyrequired,real_2d_array & u,const ae_int_t nru,real_2d_array & c,const ae_int_t ncc,real_2d_array & vt,const ae_int_t ncvt,const xparams _xparams)9646 bool rmatrixbdsvd(real_1d_array &d, const real_1d_array &e, const ae_int_t n, const bool isupper, const bool isfractionalaccuracyrequired, real_2d_array &u, const ae_int_t nru, real_2d_array &c, const ae_int_t ncc, real_2d_array &vt, const ae_int_t ncvt, const xparams _xparams)
9647 {
9648     jmp_buf _break_jump;
9649     alglib_impl::ae_state _alglib_env_state;
9650     alglib_impl::ae_state_init(&_alglib_env_state);
9651     if( setjmp(_break_jump) )
9652     {
9653 #if !defined(AE_NO_EXCEPTIONS)
9654         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9655 #else
9656         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9657         return 0;
9658 #endif
9659     }
9660     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9661     if( _xparams.flags!=0x0 )
9662         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9663     ae_bool result = alglib_impl::rmatrixbdsvd(const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_vector*>(e.c_ptr()), n, isupper, isfractionalaccuracyrequired, const_cast<alglib_impl::ae_matrix*>(u.c_ptr()), nru, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), ncc, const_cast<alglib_impl::ae_matrix*>(vt.c_ptr()), ncvt, &_alglib_env_state);
9664     alglib_impl::ae_state_clear(&_alglib_env_state);
9665     return *(reinterpret_cast<bool*>(&result));
9666 }
9667 #endif
9668 
9669 #if defined(AE_COMPILE_SVD) || !defined(AE_PARTIAL_BUILD)
9670 /*************************************************************************
9671 Singular value decomposition of a rectangular matrix.
9672 
9673   ! COMMERCIAL EDITION OF ALGLIB:
9674   !
9675   ! Commercial Edition of ALGLIB includes following important improvements
9676   ! of this function:
9677   ! * high-performance native backend with same C# interface (C# version)
9678   ! * hardware vendor (Intel) implementations of linear algebra primitives
9679   !   (C++ and C# versions, x86/x64 platform)
9680   !
9681   ! We recommend you to read 'Working with commercial version' section  of
9682   ! ALGLIB Reference Manual in order to find out how to  use  performance-
9683   ! related features provided by commercial edition of ALGLIB.
9684 
9685 The algorithm calculates the singular value decomposition of a matrix of
9686 size MxN: A = U * S * V^T
9687 
9688 The algorithm finds the singular values and, optionally, matrices U and V^T.
9689 The algorithm can find both first min(M,N) columns of matrix U and rows of
9690 matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM
9691 and NxN respectively).
9692 
9693 Take into account that the subroutine does not return matrix V but V^T.
9694 
9695 Input parameters:
9696     A           -   matrix to be decomposed.
9697                     Array whose indexes range within [0..M-1, 0..N-1].
9698     M           -   number of rows in matrix A.
9699     N           -   number of columns in matrix A.
9700     UNeeded     -   0, 1 or 2. See the description of the parameter U.
9701     VTNeeded    -   0, 1 or 2. See the description of the parameter VT.
9702     AdditionalMemory -
9703                     If the parameter:
9704                      * equals 0, the algorithm doesn't use additional
9705                        memory (lower requirements, lower performance).
9706                      * equals 1, the algorithm uses additional
9707                        memory of size min(M,N)*min(M,N) of real numbers.
9708                        It often speeds up the algorithm.
9709                      * equals 2, the algorithm uses additional
9710                        memory of size M*min(M,N) of real numbers.
9711                        It allows to get a maximum performance.
9712                     The recommended value of the parameter is 2.
9713 
9714 Output parameters:
9715     W           -   contains singular values in descending order.
9716     U           -   if UNeeded=0, U isn't changed, the left singular vectors
9717                     are not calculated.
9718                     if Uneeded=1, U contains left singular vectors (first
9719                     min(M,N) columns of matrix U). Array whose indexes range
9720                     within [0..M-1, 0..Min(M,N)-1].
9721                     if UNeeded=2, U contains matrix U wholly. Array whose
9722                     indexes range within [0..M-1, 0..M-1].
9723     VT          -   if VTNeeded=0, VT isn't changed, the right singular vectors
9724                     are not calculated.
9725                     if VTNeeded=1, VT contains right singular vectors (first
9726                     min(M,N) rows of matrix V^T). Array whose indexes range
9727                     within [0..min(M,N)-1, 0..N-1].
9728                     if VTNeeded=2, VT contains matrix V^T wholly. Array whose
9729                     indexes range within [0..N-1, 0..N-1].
9730 
9731   -- ALGLIB --
9732      Copyright 2005 by Bochkanov Sergey
9733 *************************************************************************/
rmatrixsvd(const real_2d_array & a,const ae_int_t m,const ae_int_t n,const ae_int_t uneeded,const ae_int_t vtneeded,const ae_int_t additionalmemory,real_1d_array & w,real_2d_array & u,real_2d_array & vt,const xparams _xparams)9734 bool rmatrixsvd(const real_2d_array &a, const ae_int_t m, const ae_int_t n, const ae_int_t uneeded, const ae_int_t vtneeded, const ae_int_t additionalmemory, real_1d_array &w, real_2d_array &u, real_2d_array &vt, const xparams _xparams)
9735 {
9736     jmp_buf _break_jump;
9737     alglib_impl::ae_state _alglib_env_state;
9738     alglib_impl::ae_state_init(&_alglib_env_state);
9739     if( setjmp(_break_jump) )
9740     {
9741 #if !defined(AE_NO_EXCEPTIONS)
9742         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9743 #else
9744         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9745         return 0;
9746 #endif
9747     }
9748     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9749     if( _xparams.flags!=0x0 )
9750         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9751     ae_bool result = alglib_impl::rmatrixsvd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), m, n, uneeded, vtneeded, additionalmemory, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), const_cast<alglib_impl::ae_matrix*>(u.c_ptr()), const_cast<alglib_impl::ae_matrix*>(vt.c_ptr()), &_alglib_env_state);
9752     alglib_impl::ae_state_clear(&_alglib_env_state);
9753     return *(reinterpret_cast<bool*>(&result));
9754 }
9755 #endif
9756 
9757 #if defined(AE_COMPILE_RCOND) || !defined(AE_PARTIAL_BUILD)
9758 /*************************************************************************
9759 Estimate of a matrix condition number (1-norm)
9760 
9761 The algorithm calculates a lower bound of the condition number. In this case,
9762 the algorithm does not return a lower bound of the condition number, but an
9763 inverse number (to avoid an overflow in case of a singular matrix).
9764 
9765 Input parameters:
9766     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
9767     N   -   size of matrix A.
9768 
9769 Result: 1/LowerBound(cond(A))
9770 
9771 NOTE:
9772     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
9773     0.0 is returned in such cases.
9774 *************************************************************************/
rmatrixrcond1(const real_2d_array & a,const ae_int_t n,const xparams _xparams)9775 double rmatrixrcond1(const real_2d_array &a, const ae_int_t n, const xparams _xparams)
9776 {
9777     jmp_buf _break_jump;
9778     alglib_impl::ae_state _alglib_env_state;
9779     alglib_impl::ae_state_init(&_alglib_env_state);
9780     if( setjmp(_break_jump) )
9781     {
9782 #if !defined(AE_NO_EXCEPTIONS)
9783         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9784 #else
9785         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9786         return 0;
9787 #endif
9788     }
9789     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9790     if( _xparams.flags!=0x0 )
9791         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9792     double result = alglib_impl::rmatrixrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
9793     alglib_impl::ae_state_clear(&_alglib_env_state);
9794     return *(reinterpret_cast<double*>(&result));
9795 }
9796 
9797 /*************************************************************************
9798 Estimate of a matrix condition number (infinity-norm).
9799 
9800 The algorithm calculates a lower bound of the condition number. In this case,
9801 the algorithm does not return a lower bound of the condition number, but an
9802 inverse number (to avoid an overflow in case of a singular matrix).
9803 
9804 Input parameters:
9805     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
9806     N   -   size of matrix A.
9807 
9808 Result: 1/LowerBound(cond(A))
9809 
9810 NOTE:
9811     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
9812     0.0 is returned in such cases.
9813 *************************************************************************/
rmatrixrcondinf(const real_2d_array & a,const ae_int_t n,const xparams _xparams)9814 double rmatrixrcondinf(const real_2d_array &a, const ae_int_t n, const xparams _xparams)
9815 {
9816     jmp_buf _break_jump;
9817     alglib_impl::ae_state _alglib_env_state;
9818     alglib_impl::ae_state_init(&_alglib_env_state);
9819     if( setjmp(_break_jump) )
9820     {
9821 #if !defined(AE_NO_EXCEPTIONS)
9822         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9823 #else
9824         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9825         return 0;
9826 #endif
9827     }
9828     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9829     if( _xparams.flags!=0x0 )
9830         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9831     double result = alglib_impl::rmatrixrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
9832     alglib_impl::ae_state_clear(&_alglib_env_state);
9833     return *(reinterpret_cast<double*>(&result));
9834 }
9835 
9836 /*************************************************************************
9837 Condition number estimate of a symmetric positive definite matrix.
9838 
9839 The algorithm calculates a lower bound of the condition number. In this case,
9840 the algorithm does not return a lower bound of the condition number, but an
9841 inverse number (to avoid an overflow in case of a singular matrix).
9842 
9843 It should be noted that 1-norm and inf-norm of condition numbers of symmetric
9844 matrices are equal, so the algorithm doesn't take into account the
9845 differences between these types of norms.
9846 
9847 Input parameters:
9848     A       -   symmetric positive definite matrix which is given by its
9849                 upper or lower triangle depending on the value of
9850                 IsUpper. Array with elements [0..N-1, 0..N-1].
9851     N       -   size of matrix A.
9852     IsUpper -   storage format.
9853 
9854 Result:
9855     1/LowerBound(cond(A)), if matrix A is positive definite,
9856    -1, if matrix A is not positive definite, and its condition number
9857     could not be found by this algorithm.
9858 
9859 NOTE:
9860     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
9861     0.0 is returned in such cases.
9862 *************************************************************************/
spdmatrixrcond(const real_2d_array & a,const ae_int_t n,const bool isupper,const xparams _xparams)9863 double spdmatrixrcond(const real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
9864 {
9865     jmp_buf _break_jump;
9866     alglib_impl::ae_state _alglib_env_state;
9867     alglib_impl::ae_state_init(&_alglib_env_state);
9868     if( setjmp(_break_jump) )
9869     {
9870 #if !defined(AE_NO_EXCEPTIONS)
9871         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9872 #else
9873         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9874         return 0;
9875 #endif
9876     }
9877     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9878     if( _xparams.flags!=0x0 )
9879         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9880     double result = alglib_impl::spdmatrixrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
9881     alglib_impl::ae_state_clear(&_alglib_env_state);
9882     return *(reinterpret_cast<double*>(&result));
9883 }
9884 
9885 /*************************************************************************
9886 Triangular matrix: estimate of a condition number (1-norm)
9887 
9888 The algorithm calculates a lower bound of the condition number. In this case,
9889 the algorithm does not return a lower bound of the condition number, but an
9890 inverse number (to avoid an overflow in case of a singular matrix).
9891 
9892 Input parameters:
9893     A       -   matrix. Array[0..N-1, 0..N-1].
9894     N       -   size of A.
9895     IsUpper -   True, if the matrix is upper triangular.
9896     IsUnit  -   True, if the matrix has a unit diagonal.
9897 
9898 Result: 1/LowerBound(cond(A))
9899 
9900 NOTE:
9901     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
9902     0.0 is returned in such cases.
9903 *************************************************************************/
rmatrixtrrcond1(const real_2d_array & a,const ae_int_t n,const bool isupper,const bool isunit,const xparams _xparams)9904 double rmatrixtrrcond1(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, const xparams _xparams)
9905 {
9906     jmp_buf _break_jump;
9907     alglib_impl::ae_state _alglib_env_state;
9908     alglib_impl::ae_state_init(&_alglib_env_state);
9909     if( setjmp(_break_jump) )
9910     {
9911 #if !defined(AE_NO_EXCEPTIONS)
9912         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9913 #else
9914         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9915         return 0;
9916 #endif
9917     }
9918     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9919     if( _xparams.flags!=0x0 )
9920         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9921     double result = alglib_impl::rmatrixtrrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
9922     alglib_impl::ae_state_clear(&_alglib_env_state);
9923     return *(reinterpret_cast<double*>(&result));
9924 }
9925 
9926 /*************************************************************************
9927 Triangular matrix: estimate of a matrix condition number (infinity-norm).
9928 
9929 The algorithm calculates a lower bound of the condition number. In this case,
9930 the algorithm does not return a lower bound of the condition number, but an
9931 inverse number (to avoid an overflow in case of a singular matrix).
9932 
9933 Input parameters:
9934     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
9935     N   -   size of matrix A.
9936     IsUpper -   True, if the matrix is upper triangular.
9937     IsUnit  -   True, if the matrix has a unit diagonal.
9938 
9939 Result: 1/LowerBound(cond(A))
9940 
9941 NOTE:
9942     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
9943     0.0 is returned in such cases.
9944 *************************************************************************/
rmatrixtrrcondinf(const real_2d_array & a,const ae_int_t n,const bool isupper,const bool isunit,const xparams _xparams)9945 double rmatrixtrrcondinf(const real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, const xparams _xparams)
9946 {
9947     jmp_buf _break_jump;
9948     alglib_impl::ae_state _alglib_env_state;
9949     alglib_impl::ae_state_init(&_alglib_env_state);
9950     if( setjmp(_break_jump) )
9951     {
9952 #if !defined(AE_NO_EXCEPTIONS)
9953         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9954 #else
9955         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9956         return 0;
9957 #endif
9958     }
9959     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9960     if( _xparams.flags!=0x0 )
9961         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9962     double result = alglib_impl::rmatrixtrrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
9963     alglib_impl::ae_state_clear(&_alglib_env_state);
9964     return *(reinterpret_cast<double*>(&result));
9965 }
9966 
9967 /*************************************************************************
9968 Condition number estimate of a Hermitian positive definite matrix.
9969 
9970 The algorithm calculates a lower bound of the condition number. In this case,
9971 the algorithm does not return a lower bound of the condition number, but an
9972 inverse number (to avoid an overflow in case of a singular matrix).
9973 
9974 It should be noted that 1-norm and inf-norm of condition numbers of symmetric
9975 matrices are equal, so the algorithm doesn't take into account the
9976 differences between these types of norms.
9977 
9978 Input parameters:
9979     A       -   Hermitian positive definite matrix which is given by its
9980                 upper or lower triangle depending on the value of
9981                 IsUpper. Array with elements [0..N-1, 0..N-1].
9982     N       -   size of matrix A.
9983     IsUpper -   storage format.
9984 
9985 Result:
9986     1/LowerBound(cond(A)), if matrix A is positive definite,
9987    -1, if matrix A is not positive definite, and its condition number
9988     could not be found by this algorithm.
9989 
9990 NOTE:
9991     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
9992     0.0 is returned in such cases.
9993 *************************************************************************/
hpdmatrixrcond(const complex_2d_array & a,const ae_int_t n,const bool isupper,const xparams _xparams)9994 double hpdmatrixrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
9995 {
9996     jmp_buf _break_jump;
9997     alglib_impl::ae_state _alglib_env_state;
9998     alglib_impl::ae_state_init(&_alglib_env_state);
9999     if( setjmp(_break_jump) )
10000     {
10001 #if !defined(AE_NO_EXCEPTIONS)
10002         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10003 #else
10004         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10005         return 0;
10006 #endif
10007     }
10008     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10009     if( _xparams.flags!=0x0 )
10010         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10011     double result = alglib_impl::hpdmatrixrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
10012     alglib_impl::ae_state_clear(&_alglib_env_state);
10013     return *(reinterpret_cast<double*>(&result));
10014 }
10015 
10016 /*************************************************************************
10017 Estimate of a matrix condition number (1-norm)
10018 
10019 The algorithm calculates a lower bound of the condition number. In this case,
10020 the algorithm does not return a lower bound of the condition number, but an
10021 inverse number (to avoid an overflow in case of a singular matrix).
10022 
10023 Input parameters:
10024     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
10025     N   -   size of matrix A.
10026 
10027 Result: 1/LowerBound(cond(A))
10028 
10029 NOTE:
10030     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10031     0.0 is returned in such cases.
10032 *************************************************************************/
cmatrixrcond1(const complex_2d_array & a,const ae_int_t n,const xparams _xparams)10033 double cmatrixrcond1(const complex_2d_array &a, const ae_int_t n, const xparams _xparams)
10034 {
10035     jmp_buf _break_jump;
10036     alglib_impl::ae_state _alglib_env_state;
10037     alglib_impl::ae_state_init(&_alglib_env_state);
10038     if( setjmp(_break_jump) )
10039     {
10040 #if !defined(AE_NO_EXCEPTIONS)
10041         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10042 #else
10043         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10044         return 0;
10045 #endif
10046     }
10047     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10048     if( _xparams.flags!=0x0 )
10049         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10050     double result = alglib_impl::cmatrixrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
10051     alglib_impl::ae_state_clear(&_alglib_env_state);
10052     return *(reinterpret_cast<double*>(&result));
10053 }
10054 
10055 /*************************************************************************
10056 Estimate of a matrix condition number (infinity-norm).
10057 
10058 The algorithm calculates a lower bound of the condition number. In this case,
10059 the algorithm does not return a lower bound of the condition number, but an
10060 inverse number (to avoid an overflow in case of a singular matrix).
10061 
10062 Input parameters:
10063     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
10064     N   -   size of matrix A.
10065 
10066 Result: 1/LowerBound(cond(A))
10067 
10068 NOTE:
10069     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10070     0.0 is returned in such cases.
10071 *************************************************************************/
cmatrixrcondinf(const complex_2d_array & a,const ae_int_t n,const xparams _xparams)10072 double cmatrixrcondinf(const complex_2d_array &a, const ae_int_t n, const xparams _xparams)
10073 {
10074     jmp_buf _break_jump;
10075     alglib_impl::ae_state _alglib_env_state;
10076     alglib_impl::ae_state_init(&_alglib_env_state);
10077     if( setjmp(_break_jump) )
10078     {
10079 #if !defined(AE_NO_EXCEPTIONS)
10080         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10081 #else
10082         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10083         return 0;
10084 #endif
10085     }
10086     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10087     if( _xparams.flags!=0x0 )
10088         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10089     double result = alglib_impl::cmatrixrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
10090     alglib_impl::ae_state_clear(&_alglib_env_state);
10091     return *(reinterpret_cast<double*>(&result));
10092 }
10093 
10094 /*************************************************************************
10095 Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
10096 
10097 The algorithm calculates a lower bound of the condition number. In this case,
10098 the algorithm does not return a lower bound of the condition number, but an
10099 inverse number (to avoid an overflow in case of a singular matrix).
10100 
10101 Input parameters:
10102     LUA         -   LU decomposition of a matrix in compact form. Output of
10103                     the RMatrixLU subroutine.
10104     N           -   size of matrix A.
10105 
10106 Result: 1/LowerBound(cond(A))
10107 
10108 NOTE:
10109     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10110     0.0 is returned in such cases.
10111 *************************************************************************/
rmatrixlurcond1(const real_2d_array & lua,const ae_int_t n,const xparams _xparams)10112 double rmatrixlurcond1(const real_2d_array &lua, const ae_int_t n, const xparams _xparams)
10113 {
10114     jmp_buf _break_jump;
10115     alglib_impl::ae_state _alglib_env_state;
10116     alglib_impl::ae_state_init(&_alglib_env_state);
10117     if( setjmp(_break_jump) )
10118     {
10119 #if !defined(AE_NO_EXCEPTIONS)
10120         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10121 #else
10122         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10123         return 0;
10124 #endif
10125     }
10126     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10127     if( _xparams.flags!=0x0 )
10128         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10129     double result = alglib_impl::rmatrixlurcond1(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
10130     alglib_impl::ae_state_clear(&_alglib_env_state);
10131     return *(reinterpret_cast<double*>(&result));
10132 }
10133 
10134 /*************************************************************************
10135 Estimate of the condition number of a matrix given by its LU decomposition
10136 (infinity norm).
10137 
10138 The algorithm calculates a lower bound of the condition number. In this case,
10139 the algorithm does not return a lower bound of the condition number, but an
10140 inverse number (to avoid an overflow in case of a singular matrix).
10141 
10142 Input parameters:
10143     LUA     -   LU decomposition of a matrix in compact form. Output of
10144                 the RMatrixLU subroutine.
10145     N       -   size of matrix A.
10146 
10147 Result: 1/LowerBound(cond(A))
10148 
10149 NOTE:
10150     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10151     0.0 is returned in such cases.
10152 *************************************************************************/
rmatrixlurcondinf(const real_2d_array & lua,const ae_int_t n,const xparams _xparams)10153 double rmatrixlurcondinf(const real_2d_array &lua, const ae_int_t n, const xparams _xparams)
10154 {
10155     jmp_buf _break_jump;
10156     alglib_impl::ae_state _alglib_env_state;
10157     alglib_impl::ae_state_init(&_alglib_env_state);
10158     if( setjmp(_break_jump) )
10159     {
10160 #if !defined(AE_NO_EXCEPTIONS)
10161         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10162 #else
10163         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10164         return 0;
10165 #endif
10166     }
10167     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10168     if( _xparams.flags!=0x0 )
10169         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10170     double result = alglib_impl::rmatrixlurcondinf(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
10171     alglib_impl::ae_state_clear(&_alglib_env_state);
10172     return *(reinterpret_cast<double*>(&result));
10173 }
10174 
10175 /*************************************************************************
10176 Condition number estimate of a symmetric positive definite matrix given by
10177 Cholesky decomposition.
10178 
10179 The algorithm calculates a lower bound of the condition number. In this
10180 case, the algorithm does not return a lower bound of the condition number,
10181 but an inverse number (to avoid an overflow in case of a singular matrix).
10182 
10183 It should be noted that 1-norm and inf-norm condition numbers of symmetric
10184 matrices are equal, so the algorithm doesn't take into account the
10185 differences between these types of norms.
10186 
10187 Input parameters:
10188     CD  - Cholesky decomposition of matrix A,
10189           output of SMatrixCholesky subroutine.
10190     N   - size of matrix A.
10191 
10192 Result: 1/LowerBound(cond(A))
10193 
10194 NOTE:
10195     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10196     0.0 is returned in such cases.
10197 *************************************************************************/
spdmatrixcholeskyrcond(const real_2d_array & a,const ae_int_t n,const bool isupper,const xparams _xparams)10198 double spdmatrixcholeskyrcond(const real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
10199 {
10200     jmp_buf _break_jump;
10201     alglib_impl::ae_state _alglib_env_state;
10202     alglib_impl::ae_state_init(&_alglib_env_state);
10203     if( setjmp(_break_jump) )
10204     {
10205 #if !defined(AE_NO_EXCEPTIONS)
10206         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10207 #else
10208         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10209         return 0;
10210 #endif
10211     }
10212     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10213     if( _xparams.flags!=0x0 )
10214         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10215     double result = alglib_impl::spdmatrixcholeskyrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
10216     alglib_impl::ae_state_clear(&_alglib_env_state);
10217     return *(reinterpret_cast<double*>(&result));
10218 }
10219 
10220 /*************************************************************************
10221 Condition number estimate of a Hermitian positive definite matrix given by
10222 Cholesky decomposition.
10223 
10224 The algorithm calculates a lower bound of the condition number. In this
10225 case, the algorithm does not return a lower bound of the condition number,
10226 but an inverse number (to avoid an overflow in case of a singular matrix).
10227 
10228 It should be noted that 1-norm and inf-norm condition numbers of symmetric
10229 matrices are equal, so the algorithm doesn't take into account the
10230 differences between these types of norms.
10231 
10232 Input parameters:
10233     CD  - Cholesky decomposition of matrix A,
10234           output of SMatrixCholesky subroutine.
10235     N   - size of matrix A.
10236 
10237 Result: 1/LowerBound(cond(A))
10238 
10239 NOTE:
10240     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10241     0.0 is returned in such cases.
10242 *************************************************************************/
hpdmatrixcholeskyrcond(const complex_2d_array & a,const ae_int_t n,const bool isupper,const xparams _xparams)10243 double hpdmatrixcholeskyrcond(const complex_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
10244 {
10245     jmp_buf _break_jump;
10246     alglib_impl::ae_state _alglib_env_state;
10247     alglib_impl::ae_state_init(&_alglib_env_state);
10248     if( setjmp(_break_jump) )
10249     {
10250 #if !defined(AE_NO_EXCEPTIONS)
10251         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10252 #else
10253         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10254         return 0;
10255 #endif
10256     }
10257     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10258     if( _xparams.flags!=0x0 )
10259         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10260     double result = alglib_impl::hpdmatrixcholeskyrcond(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
10261     alglib_impl::ae_state_clear(&_alglib_env_state);
10262     return *(reinterpret_cast<double*>(&result));
10263 }
10264 
10265 /*************************************************************************
10266 Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
10267 
10268 The algorithm calculates a lower bound of the condition number. In this case,
10269 the algorithm does not return a lower bound of the condition number, but an
10270 inverse number (to avoid an overflow in case of a singular matrix).
10271 
10272 Input parameters:
10273     LUA         -   LU decomposition of a matrix in compact form. Output of
10274                     the CMatrixLU subroutine.
10275     N           -   size of matrix A.
10276 
10277 Result: 1/LowerBound(cond(A))
10278 
10279 NOTE:
10280     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10281     0.0 is returned in such cases.
10282 *************************************************************************/
cmatrixlurcond1(const complex_2d_array & lua,const ae_int_t n,const xparams _xparams)10283 double cmatrixlurcond1(const complex_2d_array &lua, const ae_int_t n, const xparams _xparams)
10284 {
10285     jmp_buf _break_jump;
10286     alglib_impl::ae_state _alglib_env_state;
10287     alglib_impl::ae_state_init(&_alglib_env_state);
10288     if( setjmp(_break_jump) )
10289     {
10290 #if !defined(AE_NO_EXCEPTIONS)
10291         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10292 #else
10293         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10294         return 0;
10295 #endif
10296     }
10297     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10298     if( _xparams.flags!=0x0 )
10299         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10300     double result = alglib_impl::cmatrixlurcond1(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
10301     alglib_impl::ae_state_clear(&_alglib_env_state);
10302     return *(reinterpret_cast<double*>(&result));
10303 }
10304 
10305 /*************************************************************************
10306 Estimate of the condition number of a matrix given by its LU decomposition
10307 (infinity norm).
10308 
10309 The algorithm calculates a lower bound of the condition number. In this case,
10310 the algorithm does not return a lower bound of the condition number, but an
10311 inverse number (to avoid an overflow in case of a singular matrix).
10312 
10313 Input parameters:
10314     LUA     -   LU decomposition of a matrix in compact form. Output of
10315                 the CMatrixLU subroutine.
10316     N       -   size of matrix A.
10317 
10318 Result: 1/LowerBound(cond(A))
10319 
10320 NOTE:
10321     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10322     0.0 is returned in such cases.
10323 *************************************************************************/
cmatrixlurcondinf(const complex_2d_array & lua,const ae_int_t n,const xparams _xparams)10324 double cmatrixlurcondinf(const complex_2d_array &lua, const ae_int_t n, const xparams _xparams)
10325 {
10326     jmp_buf _break_jump;
10327     alglib_impl::ae_state _alglib_env_state;
10328     alglib_impl::ae_state_init(&_alglib_env_state);
10329     if( setjmp(_break_jump) )
10330     {
10331 #if !defined(AE_NO_EXCEPTIONS)
10332         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10333 #else
10334         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10335         return 0;
10336 #endif
10337     }
10338     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10339     if( _xparams.flags!=0x0 )
10340         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10341     double result = alglib_impl::cmatrixlurcondinf(const_cast<alglib_impl::ae_matrix*>(lua.c_ptr()), n, &_alglib_env_state);
10342     alglib_impl::ae_state_clear(&_alglib_env_state);
10343     return *(reinterpret_cast<double*>(&result));
10344 }
10345 
10346 /*************************************************************************
10347 Triangular matrix: estimate of a condition number (1-norm)
10348 
10349 The algorithm calculates a lower bound of the condition number. In this case,
10350 the algorithm does not return a lower bound of the condition number, but an
10351 inverse number (to avoid an overflow in case of a singular matrix).
10352 
10353 Input parameters:
10354     A       -   matrix. Array[0..N-1, 0..N-1].
10355     N       -   size of A.
10356     IsUpper -   True, if the matrix is upper triangular.
10357     IsUnit  -   True, if the matrix has a unit diagonal.
10358 
10359 Result: 1/LowerBound(cond(A))
10360 
10361 NOTE:
10362     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10363     0.0 is returned in such cases.
10364 *************************************************************************/
cmatrixtrrcond1(const complex_2d_array & a,const ae_int_t n,const bool isupper,const bool isunit,const xparams _xparams)10365 double cmatrixtrrcond1(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, const xparams _xparams)
10366 {
10367     jmp_buf _break_jump;
10368     alglib_impl::ae_state _alglib_env_state;
10369     alglib_impl::ae_state_init(&_alglib_env_state);
10370     if( setjmp(_break_jump) )
10371     {
10372 #if !defined(AE_NO_EXCEPTIONS)
10373         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10374 #else
10375         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10376         return 0;
10377 #endif
10378     }
10379     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10380     if( _xparams.flags!=0x0 )
10381         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10382     double result = alglib_impl::cmatrixtrrcond1(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
10383     alglib_impl::ae_state_clear(&_alglib_env_state);
10384     return *(reinterpret_cast<double*>(&result));
10385 }
10386 
10387 /*************************************************************************
10388 Triangular matrix: estimate of a matrix condition number (infinity-norm).
10389 
10390 The algorithm calculates a lower bound of the condition number. In this case,
10391 the algorithm does not return a lower bound of the condition number, but an
10392 inverse number (to avoid an overflow in case of a singular matrix).
10393 
10394 Input parameters:
10395     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
10396     N   -   size of matrix A.
10397     IsUpper -   True, if the matrix is upper triangular.
10398     IsUnit  -   True, if the matrix has a unit diagonal.
10399 
10400 Result: 1/LowerBound(cond(A))
10401 
10402 NOTE:
10403     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
10404     0.0 is returned in such cases.
10405 *************************************************************************/
cmatrixtrrcondinf(const complex_2d_array & a,const ae_int_t n,const bool isupper,const bool isunit,const xparams _xparams)10406 double cmatrixtrrcondinf(const complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, const xparams _xparams)
10407 {
10408     jmp_buf _break_jump;
10409     alglib_impl::ae_state _alglib_env_state;
10410     alglib_impl::ae_state_init(&_alglib_env_state);
10411     if( setjmp(_break_jump) )
10412     {
10413 #if !defined(AE_NO_EXCEPTIONS)
10414         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10415 #else
10416         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10417         return 0;
10418 #endif
10419     }
10420     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10421     if( _xparams.flags!=0x0 )
10422         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10423     double result = alglib_impl::cmatrixtrrcondinf(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &_alglib_env_state);
10424     alglib_impl::ae_state_clear(&_alglib_env_state);
10425     return *(reinterpret_cast<double*>(&result));
10426 }
10427 #endif
10428 
10429 #if defined(AE_COMPILE_FBLS) || !defined(AE_PARTIAL_BUILD)
10430 
10431 #endif
10432 
10433 #if defined(AE_COMPILE_NORMESTIMATOR) || !defined(AE_PARTIAL_BUILD)
10434 /*************************************************************************
10435 This object stores state of the iterative norm estimation algorithm.
10436 
10437 You should use ALGLIB functions to work with this object.
10438 *************************************************************************/
_normestimatorstate_owner()10439 _normestimatorstate_owner::_normestimatorstate_owner()
10440 {
10441     jmp_buf _break_jump;
10442     alglib_impl::ae_state _state;
10443 
10444     alglib_impl::ae_state_init(&_state);
10445     if( setjmp(_break_jump) )
10446     {
10447         if( p_struct!=NULL )
10448         {
10449             alglib_impl::_normestimatorstate_destroy(p_struct);
10450             alglib_impl::ae_free(p_struct);
10451         }
10452         p_struct = NULL;
10453 #if !defined(AE_NO_EXCEPTIONS)
10454         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10455 #else
10456         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10457         return;
10458 #endif
10459     }
10460     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10461     p_struct = NULL;
10462     p_struct = (alglib_impl::normestimatorstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::normestimatorstate), &_state);
10463     memset(p_struct, 0, sizeof(alglib_impl::normestimatorstate));
10464     alglib_impl::_normestimatorstate_init(p_struct, &_state, ae_false);
10465     ae_state_clear(&_state);
10466 }
10467 
_normestimatorstate_owner(const _normestimatorstate_owner & rhs)10468 _normestimatorstate_owner::_normestimatorstate_owner(const _normestimatorstate_owner &rhs)
10469 {
10470     jmp_buf _break_jump;
10471     alglib_impl::ae_state _state;
10472 
10473     alglib_impl::ae_state_init(&_state);
10474     if( setjmp(_break_jump) )
10475     {
10476         if( p_struct!=NULL )
10477         {
10478             alglib_impl::_normestimatorstate_destroy(p_struct);
10479             alglib_impl::ae_free(p_struct);
10480         }
10481         p_struct = NULL;
10482 #if !defined(AE_NO_EXCEPTIONS)
10483         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10484 #else
10485         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10486         return;
10487 #endif
10488     }
10489     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10490     p_struct = NULL;
10491     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: normestimatorstate copy constructor failure (source is not initialized)", &_state);
10492     p_struct = (alglib_impl::normestimatorstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::normestimatorstate), &_state);
10493     memset(p_struct, 0, sizeof(alglib_impl::normestimatorstate));
10494     alglib_impl::_normestimatorstate_init_copy(p_struct, const_cast<alglib_impl::normestimatorstate*>(rhs.p_struct), &_state, ae_false);
10495     ae_state_clear(&_state);
10496 }
10497 
operator =(const _normestimatorstate_owner & rhs)10498 _normestimatorstate_owner& _normestimatorstate_owner::operator=(const _normestimatorstate_owner &rhs)
10499 {
10500     if( this==&rhs )
10501         return *this;
10502     jmp_buf _break_jump;
10503     alglib_impl::ae_state _state;
10504 
10505     alglib_impl::ae_state_init(&_state);
10506     if( setjmp(_break_jump) )
10507     {
10508 #if !defined(AE_NO_EXCEPTIONS)
10509         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10510 #else
10511         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10512         return *this;
10513 #endif
10514     }
10515     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10516     alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: normestimatorstate assignment constructor failure (destination is not initialized)", &_state);
10517     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: normestimatorstate assignment constructor failure (source is not initialized)", &_state);
10518     alglib_impl::_normestimatorstate_destroy(p_struct);
10519     memset(p_struct, 0, sizeof(alglib_impl::normestimatorstate));
10520     alglib_impl::_normestimatorstate_init_copy(p_struct, const_cast<alglib_impl::normestimatorstate*>(rhs.p_struct), &_state, ae_false);
10521     ae_state_clear(&_state);
10522     return *this;
10523 }
10524 
~_normestimatorstate_owner()10525 _normestimatorstate_owner::~_normestimatorstate_owner()
10526 {
10527     if( p_struct!=NULL )
10528     {
10529         alglib_impl::_normestimatorstate_destroy(p_struct);
10530         ae_free(p_struct);
10531     }
10532 }
10533 
c_ptr()10534 alglib_impl::normestimatorstate* _normestimatorstate_owner::c_ptr()
10535 {
10536     return p_struct;
10537 }
10538 
c_ptr() const10539 alglib_impl::normestimatorstate* _normestimatorstate_owner::c_ptr() const
10540 {
10541     return const_cast<alglib_impl::normestimatorstate*>(p_struct);
10542 }
normestimatorstate()10543 normestimatorstate::normestimatorstate() : _normestimatorstate_owner()
10544 {
10545 }
10546 
normestimatorstate(const normestimatorstate & rhs)10547 normestimatorstate::normestimatorstate(const normestimatorstate &rhs):_normestimatorstate_owner(rhs)
10548 {
10549 }
10550 
operator =(const normestimatorstate & rhs)10551 normestimatorstate& normestimatorstate::operator=(const normestimatorstate &rhs)
10552 {
10553     if( this==&rhs )
10554         return *this;
10555     _normestimatorstate_owner::operator=(rhs);
10556     return *this;
10557 }
10558 
~normestimatorstate()10559 normestimatorstate::~normestimatorstate()
10560 {
10561 }
10562 
10563 /*************************************************************************
10564 This procedure initializes matrix norm estimator.
10565 
10566 USAGE:
10567 1. User initializes algorithm state with NormEstimatorCreate() call
10568 2. User calls NormEstimatorEstimateSparse() (or NormEstimatorIteration())
10569 3. User calls NormEstimatorResults() to get solution.
10570 
10571 INPUT PARAMETERS:
10572     M       -   number of rows in the matrix being estimated, M>0
10573     N       -   number of columns in the matrix being estimated, N>0
10574     NStart  -   number of random starting vectors
10575                 recommended value - at least 5.
10576     NIts    -   number of iterations to do with best starting vector
10577                 recommended value - at least 5.
10578 
10579 OUTPUT PARAMETERS:
10580     State   -   structure which stores algorithm state
10581 
10582 
10583 NOTE: this algorithm is effectively deterministic, i.e. it always  returns
10584 same result when repeatedly called for the same matrix. In fact, algorithm
10585 uses randomized starting vectors, but internal  random  numbers  generator
10586 always generates same sequence of the random values (it is a  feature, not
10587 bug).
10588 
10589 Algorithm can be made non-deterministic with NormEstimatorSetSeed(0) call.
10590 
10591   -- ALGLIB --
10592      Copyright 06.12.2011 by Bochkanov Sergey
10593 *************************************************************************/
normestimatorcreate(const ae_int_t m,const ae_int_t n,const ae_int_t nstart,const ae_int_t nits,normestimatorstate & state,const xparams _xparams)10594 void normestimatorcreate(const ae_int_t m, const ae_int_t n, const ae_int_t nstart, const ae_int_t nits, normestimatorstate &state, const xparams _xparams)
10595 {
10596     jmp_buf _break_jump;
10597     alglib_impl::ae_state _alglib_env_state;
10598     alglib_impl::ae_state_init(&_alglib_env_state);
10599     if( setjmp(_break_jump) )
10600     {
10601 #if !defined(AE_NO_EXCEPTIONS)
10602         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10603 #else
10604         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10605         return;
10606 #endif
10607     }
10608     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10609     if( _xparams.flags!=0x0 )
10610         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10611     alglib_impl::normestimatorcreate(m, n, nstart, nits, const_cast<alglib_impl::normestimatorstate*>(state.c_ptr()), &_alglib_env_state);
10612     alglib_impl::ae_state_clear(&_alglib_env_state);
10613     return;
10614 }
10615 
10616 /*************************************************************************
10617 This function changes seed value used by algorithm. In some cases we  need
10618 deterministic processing, i.e. subsequent calls must return equal results,
10619 in other cases we need non-deterministic algorithm which returns different
10620 results for the same matrix on every pass.
10621 
10622 Setting zero seed will lead to non-deterministic algorithm, while non-zero
10623 value will make our algorithm deterministic.
10624 
10625 INPUT PARAMETERS:
10626     State       -   norm estimator state, must be initialized with a  call
10627                     to NormEstimatorCreate()
10628     SeedVal     -   seed value, >=0. Zero value = non-deterministic algo.
10629 
10630   -- ALGLIB --
10631      Copyright 06.12.2011 by Bochkanov Sergey
10632 *************************************************************************/
normestimatorsetseed(const normestimatorstate & state,const ae_int_t seedval,const xparams _xparams)10633 void normestimatorsetseed(const normestimatorstate &state, const ae_int_t seedval, const xparams _xparams)
10634 {
10635     jmp_buf _break_jump;
10636     alglib_impl::ae_state _alglib_env_state;
10637     alglib_impl::ae_state_init(&_alglib_env_state);
10638     if( setjmp(_break_jump) )
10639     {
10640 #if !defined(AE_NO_EXCEPTIONS)
10641         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10642 #else
10643         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10644         return;
10645 #endif
10646     }
10647     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10648     if( _xparams.flags!=0x0 )
10649         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10650     alglib_impl::normestimatorsetseed(const_cast<alglib_impl::normestimatorstate*>(state.c_ptr()), seedval, &_alglib_env_state);
10651     alglib_impl::ae_state_clear(&_alglib_env_state);
10652     return;
10653 }
10654 
10655 /*************************************************************************
10656 This function estimates norm of the sparse M*N matrix A.
10657 
10658 INPUT PARAMETERS:
10659     State       -   norm estimator state, must be initialized with a  call
10660                     to NormEstimatorCreate()
10661     A           -   sparse M*N matrix, must be converted to CRS format
10662                     prior to calling this function.
10663 
10664 After this function  is  over  you can call NormEstimatorResults() to get
10665 estimate of the norm(A).
10666 
10667   -- ALGLIB --
10668      Copyright 06.12.2011 by Bochkanov Sergey
10669 *************************************************************************/
normestimatorestimatesparse(const normestimatorstate & state,const sparsematrix & a,const xparams _xparams)10670 void normestimatorestimatesparse(const normestimatorstate &state, const sparsematrix &a, const xparams _xparams)
10671 {
10672     jmp_buf _break_jump;
10673     alglib_impl::ae_state _alglib_env_state;
10674     alglib_impl::ae_state_init(&_alglib_env_state);
10675     if( setjmp(_break_jump) )
10676     {
10677 #if !defined(AE_NO_EXCEPTIONS)
10678         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10679 #else
10680         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10681         return;
10682 #endif
10683     }
10684     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10685     if( _xparams.flags!=0x0 )
10686         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10687     alglib_impl::normestimatorestimatesparse(const_cast<alglib_impl::normestimatorstate*>(state.c_ptr()), const_cast<alglib_impl::sparsematrix*>(a.c_ptr()), &_alglib_env_state);
10688     alglib_impl::ae_state_clear(&_alglib_env_state);
10689     return;
10690 }
10691 
10692 /*************************************************************************
10693 Matrix norm estimation results
10694 
10695 INPUT PARAMETERS:
10696     State   -   algorithm state
10697 
10698 OUTPUT PARAMETERS:
10699     Nrm     -   estimate of the matrix norm, Nrm>=0
10700 
10701   -- ALGLIB --
10702      Copyright 06.12.2011 by Bochkanov Sergey
10703 *************************************************************************/
normestimatorresults(const normestimatorstate & state,double & nrm,const xparams _xparams)10704 void normestimatorresults(const normestimatorstate &state, double &nrm, const xparams _xparams)
10705 {
10706     jmp_buf _break_jump;
10707     alglib_impl::ae_state _alglib_env_state;
10708     alglib_impl::ae_state_init(&_alglib_env_state);
10709     if( setjmp(_break_jump) )
10710     {
10711 #if !defined(AE_NO_EXCEPTIONS)
10712         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10713 #else
10714         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10715         return;
10716 #endif
10717     }
10718     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10719     if( _xparams.flags!=0x0 )
10720         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10721     alglib_impl::normestimatorresults(const_cast<alglib_impl::normestimatorstate*>(state.c_ptr()), &nrm, &_alglib_env_state);
10722     alglib_impl::ae_state_clear(&_alglib_env_state);
10723     return;
10724 }
10725 #endif
10726 
10727 #if defined(AE_COMPILE_MATINV) || !defined(AE_PARTIAL_BUILD)
10728 /*************************************************************************
10729 Matrix inverse report:
10730 * R1    reciprocal of condition number in 1-norm
10731 * RInf  reciprocal of condition number in inf-norm
10732 *************************************************************************/
_matinvreport_owner()10733 _matinvreport_owner::_matinvreport_owner()
10734 {
10735     jmp_buf _break_jump;
10736     alglib_impl::ae_state _state;
10737 
10738     alglib_impl::ae_state_init(&_state);
10739     if( setjmp(_break_jump) )
10740     {
10741         if( p_struct!=NULL )
10742         {
10743             alglib_impl::_matinvreport_destroy(p_struct);
10744             alglib_impl::ae_free(p_struct);
10745         }
10746         p_struct = NULL;
10747 #if !defined(AE_NO_EXCEPTIONS)
10748         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10749 #else
10750         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10751         return;
10752 #endif
10753     }
10754     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10755     p_struct = NULL;
10756     p_struct = (alglib_impl::matinvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::matinvreport), &_state);
10757     memset(p_struct, 0, sizeof(alglib_impl::matinvreport));
10758     alglib_impl::_matinvreport_init(p_struct, &_state, ae_false);
10759     ae_state_clear(&_state);
10760 }
10761 
_matinvreport_owner(const _matinvreport_owner & rhs)10762 _matinvreport_owner::_matinvreport_owner(const _matinvreport_owner &rhs)
10763 {
10764     jmp_buf _break_jump;
10765     alglib_impl::ae_state _state;
10766 
10767     alglib_impl::ae_state_init(&_state);
10768     if( setjmp(_break_jump) )
10769     {
10770         if( p_struct!=NULL )
10771         {
10772             alglib_impl::_matinvreport_destroy(p_struct);
10773             alglib_impl::ae_free(p_struct);
10774         }
10775         p_struct = NULL;
10776 #if !defined(AE_NO_EXCEPTIONS)
10777         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10778 #else
10779         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10780         return;
10781 #endif
10782     }
10783     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10784     p_struct = NULL;
10785     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: matinvreport copy constructor failure (source is not initialized)", &_state);
10786     p_struct = (alglib_impl::matinvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::matinvreport), &_state);
10787     memset(p_struct, 0, sizeof(alglib_impl::matinvreport));
10788     alglib_impl::_matinvreport_init_copy(p_struct, const_cast<alglib_impl::matinvreport*>(rhs.p_struct), &_state, ae_false);
10789     ae_state_clear(&_state);
10790 }
10791 
operator =(const _matinvreport_owner & rhs)10792 _matinvreport_owner& _matinvreport_owner::operator=(const _matinvreport_owner &rhs)
10793 {
10794     if( this==&rhs )
10795         return *this;
10796     jmp_buf _break_jump;
10797     alglib_impl::ae_state _state;
10798 
10799     alglib_impl::ae_state_init(&_state);
10800     if( setjmp(_break_jump) )
10801     {
10802 #if !defined(AE_NO_EXCEPTIONS)
10803         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10804 #else
10805         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10806         return *this;
10807 #endif
10808     }
10809     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10810     alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: matinvreport assignment constructor failure (destination is not initialized)", &_state);
10811     alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: matinvreport assignment constructor failure (source is not initialized)", &_state);
10812     alglib_impl::_matinvreport_destroy(p_struct);
10813     memset(p_struct, 0, sizeof(alglib_impl::matinvreport));
10814     alglib_impl::_matinvreport_init_copy(p_struct, const_cast<alglib_impl::matinvreport*>(rhs.p_struct), &_state, ae_false);
10815     ae_state_clear(&_state);
10816     return *this;
10817 }
10818 
~_matinvreport_owner()10819 _matinvreport_owner::~_matinvreport_owner()
10820 {
10821     if( p_struct!=NULL )
10822     {
10823         alglib_impl::_matinvreport_destroy(p_struct);
10824         ae_free(p_struct);
10825     }
10826 }
10827 
c_ptr()10828 alglib_impl::matinvreport* _matinvreport_owner::c_ptr()
10829 {
10830     return p_struct;
10831 }
10832 
c_ptr() const10833 alglib_impl::matinvreport* _matinvreport_owner::c_ptr() const
10834 {
10835     return const_cast<alglib_impl::matinvreport*>(p_struct);
10836 }
matinvreport()10837 matinvreport::matinvreport() : _matinvreport_owner() ,r1(p_struct->r1),rinf(p_struct->rinf)
10838 {
10839 }
10840 
matinvreport(const matinvreport & rhs)10841 matinvreport::matinvreport(const matinvreport &rhs):_matinvreport_owner(rhs) ,r1(p_struct->r1),rinf(p_struct->rinf)
10842 {
10843 }
10844 
operator =(const matinvreport & rhs)10845 matinvreport& matinvreport::operator=(const matinvreport &rhs)
10846 {
10847     if( this==&rhs )
10848         return *this;
10849     _matinvreport_owner::operator=(rhs);
10850     return *this;
10851 }
10852 
~matinvreport()10853 matinvreport::~matinvreport()
10854 {
10855 }
10856 
10857 /*************************************************************************
10858 Inversion of a matrix given by its LU decomposition.
10859 
10860 INPUT PARAMETERS:
10861     A       -   LU decomposition of the matrix
10862                 (output of RMatrixLU subroutine).
10863     Pivots  -   table of permutations
10864                 (the output of RMatrixLU subroutine).
10865     N       -   size of matrix A (optional) :
10866                 * if given, only principal NxN submatrix is processed  and
10867                   overwritten. other elements are unchanged.
10868                 * if not given,  size  is  automatically  determined  from
10869                   matrix size (A must be square matrix)
10870 
10871 OUTPUT PARAMETERS:
10872     Info    -   return code:
10873                 * -3    A is singular, or VERY close to singular.
10874                         it is filled by zeros in such cases.
10875                 *  1    task is solved (but matrix A may be ill-conditioned,
10876                         check R1/RInf parameters for condition numbers).
10877     Rep     -   solver report, see below for more info
10878     A       -   inverse of matrix A.
10879                 Array whose indexes range within [0..N-1, 0..N-1].
10880 
10881 SOLVER REPORT
10882 
10883 Subroutine sets following fields of the Rep structure:
10884 * R1        reciprocal of condition number: 1/cond(A), 1-norm.
10885 * RInf      reciprocal of condition number: 1/cond(A), inf-norm.
10886 
10887   ! FREE EDITION OF ALGLIB:
10888   !
10889   ! Free Edition of ALGLIB supports following important features for  this
10890   ! function:
10891   ! * C++ version: x64 SIMD support using C++ intrinsics
10892   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
10893   !
10894   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
10895   ! Reference Manual in order  to  find  out  how to activate SIMD support
10896   ! in ALGLIB.
10897 
10898   ! COMMERCIAL EDITION OF ALGLIB:
10899   !
10900   ! Commercial Edition of ALGLIB includes following important improvements
10901   ! of this function:
10902   ! * high-performance native backend with same C# interface (C# version)
10903   ! * multithreading support (C++ and C# versions)
10904   ! * hardware vendor (Intel) implementations of linear algebra primitives
10905   !   (C++ and C# versions, x86/x64 platform)
10906   !
10907   ! We recommend you to read 'Working with commercial version' section  of
10908   ! ALGLIB Reference Manual in order to find out how to  use  performance-
10909   ! related features provided by commercial edition of ALGLIB.
10910 
10911   -- ALGLIB routine --
10912      05.02.2010
10913      Bochkanov Sergey
10914 *************************************************************************/
rmatrixluinverse(real_2d_array & a,const integer_1d_array & pivots,const ae_int_t n,ae_int_t & info,matinvreport & rep,const xparams _xparams)10915 void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep, const xparams _xparams)
10916 {
10917     jmp_buf _break_jump;
10918     alglib_impl::ae_state _alglib_env_state;
10919     alglib_impl::ae_state_init(&_alglib_env_state);
10920     if( setjmp(_break_jump) )
10921     {
10922 #if !defined(AE_NO_EXCEPTIONS)
10923         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10924 #else
10925         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10926         return;
10927 #endif
10928     }
10929     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10930     if( _xparams.flags!=0x0 )
10931         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10932     alglib_impl::rmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
10933     alglib_impl::ae_state_clear(&_alglib_env_state);
10934     return;
10935 }
10936 
10937 /*************************************************************************
10938 Inversion of a matrix given by its LU decomposition.
10939 
10940 INPUT PARAMETERS:
10941     A       -   LU decomposition of the matrix
10942                 (output of RMatrixLU subroutine).
10943     Pivots  -   table of permutations
10944                 (the output of RMatrixLU subroutine).
10945     N       -   size of matrix A (optional) :
10946                 * if given, only principal NxN submatrix is processed  and
10947                   overwritten. other elements are unchanged.
10948                 * if not given,  size  is  automatically  determined  from
10949                   matrix size (A must be square matrix)
10950 
10951 OUTPUT PARAMETERS:
10952     Info    -   return code:
10953                 * -3    A is singular, or VERY close to singular.
10954                         it is filled by zeros in such cases.
10955                 *  1    task is solved (but matrix A may be ill-conditioned,
10956                         check R1/RInf parameters for condition numbers).
10957     Rep     -   solver report, see below for more info
10958     A       -   inverse of matrix A.
10959                 Array whose indexes range within [0..N-1, 0..N-1].
10960 
10961 SOLVER REPORT
10962 
10963 Subroutine sets following fields of the Rep structure:
10964 * R1        reciprocal of condition number: 1/cond(A), 1-norm.
10965 * RInf      reciprocal of condition number: 1/cond(A), inf-norm.
10966 
10967   ! FREE EDITION OF ALGLIB:
10968   !
10969   ! Free Edition of ALGLIB supports following important features for  this
10970   ! function:
10971   ! * C++ version: x64 SIMD support using C++ intrinsics
10972   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
10973   !
10974   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
10975   ! Reference Manual in order  to  find  out  how to activate SIMD support
10976   ! in ALGLIB.
10977 
10978   ! COMMERCIAL EDITION OF ALGLIB:
10979   !
10980   ! Commercial Edition of ALGLIB includes following important improvements
10981   ! of this function:
10982   ! * high-performance native backend with same C# interface (C# version)
10983   ! * multithreading support (C++ and C# versions)
10984   ! * hardware vendor (Intel) implementations of linear algebra primitives
10985   !   (C++ and C# versions, x86/x64 platform)
10986   !
10987   ! We recommend you to read 'Working with commercial version' section  of
10988   ! ALGLIB Reference Manual in order to find out how to  use  performance-
10989   ! related features provided by commercial edition of ALGLIB.
10990 
10991   -- ALGLIB routine --
10992      05.02.2010
10993      Bochkanov Sergey
10994 *************************************************************************/
10995 #if !defined(AE_NO_EXCEPTIONS)
rmatrixluinverse(real_2d_array & a,const integer_1d_array & pivots,ae_int_t & info,matinvreport & rep,const xparams _xparams)10996 void rmatrixluinverse(real_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep, const xparams _xparams)
10997 {
10998     jmp_buf _break_jump;
10999     alglib_impl::ae_state _alglib_env_state;
11000     ae_int_t n;
11001     if( (a.cols()!=a.rows()) || (a.cols()!=pivots.length()))
11002         _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixluinverse': looks like one of arguments has wrong size");
11003     n = a.cols();
11004     alglib_impl::ae_state_init(&_alglib_env_state);
11005     if( setjmp(_break_jump) )
11006         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11007     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11008     if( _xparams.flags!=0x0 )
11009         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11010     alglib_impl::rmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11011 
11012     alglib_impl::ae_state_clear(&_alglib_env_state);
11013     return;
11014 }
11015 #endif
11016 
11017 /*************************************************************************
11018 Inversion of a general matrix.
11019 
11020 Input parameters:
11021     A       -   matrix.
11022     N       -   size of matrix A (optional) :
11023                 * if given, only principal NxN submatrix is processed  and
11024                   overwritten. other elements are unchanged.
11025                 * if not given,  size  is  automatically  determined  from
11026                   matrix size (A must be square matrix)
11027 
11028 Output parameters:
11029     Info    -   return code, same as in RMatrixLUInverse
11030     Rep     -   solver report, same as in RMatrixLUInverse
11031     A       -   inverse of matrix A, same as in RMatrixLUInverse
11032 
11033 Result:
11034     True, if the matrix is not singular.
11035     False, if the matrix is singular.
11036 
11037   ! FREE EDITION OF ALGLIB:
11038   !
11039   ! Free Edition of ALGLIB supports following important features for  this
11040   ! function:
11041   ! * C++ version: x64 SIMD support using C++ intrinsics
11042   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11043   !
11044   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11045   ! Reference Manual in order  to  find  out  how to activate SIMD support
11046   ! in ALGLIB.
11047 
11048   ! COMMERCIAL EDITION OF ALGLIB:
11049   !
11050   ! Commercial Edition of ALGLIB includes following important improvements
11051   ! of this function:
11052   ! * high-performance native backend with same C# interface (C# version)
11053   ! * multithreading support (C++ and C# versions)
11054   ! * hardware vendor (Intel) implementations of linear algebra primitives
11055   !   (C++ and C# versions, x86/x64 platform)
11056   !
11057   ! We recommend you to read 'Working with commercial version' section  of
11058   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11059   ! related features provided by commercial edition of ALGLIB.
11060 
11061   -- ALGLIB --
11062      Copyright 2005-2010 by Bochkanov Sergey
11063 *************************************************************************/
rmatrixinverse(real_2d_array & a,const ae_int_t n,ae_int_t & info,matinvreport & rep,const xparams _xparams)11064 void rmatrixinverse(real_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11065 {
11066     jmp_buf _break_jump;
11067     alglib_impl::ae_state _alglib_env_state;
11068     alglib_impl::ae_state_init(&_alglib_env_state);
11069     if( setjmp(_break_jump) )
11070     {
11071 #if !defined(AE_NO_EXCEPTIONS)
11072         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11073 #else
11074         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11075         return;
11076 #endif
11077     }
11078     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11079     if( _xparams.flags!=0x0 )
11080         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11081     alglib_impl::rmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11082     alglib_impl::ae_state_clear(&_alglib_env_state);
11083     return;
11084 }
11085 
11086 /*************************************************************************
11087 Inversion of a general matrix.
11088 
11089 Input parameters:
11090     A       -   matrix.
11091     N       -   size of matrix A (optional) :
11092                 * if given, only principal NxN submatrix is processed  and
11093                   overwritten. other elements are unchanged.
11094                 * if not given,  size  is  automatically  determined  from
11095                   matrix size (A must be square matrix)
11096 
11097 Output parameters:
11098     Info    -   return code, same as in RMatrixLUInverse
11099     Rep     -   solver report, same as in RMatrixLUInverse
11100     A       -   inverse of matrix A, same as in RMatrixLUInverse
11101 
11102 Result:
11103     True, if the matrix is not singular.
11104     False, if the matrix is singular.
11105 
11106   ! FREE EDITION OF ALGLIB:
11107   !
11108   ! Free Edition of ALGLIB supports following important features for  this
11109   ! function:
11110   ! * C++ version: x64 SIMD support using C++ intrinsics
11111   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11112   !
11113   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11114   ! Reference Manual in order  to  find  out  how to activate SIMD support
11115   ! in ALGLIB.
11116 
11117   ! COMMERCIAL EDITION OF ALGLIB:
11118   !
11119   ! Commercial Edition of ALGLIB includes following important improvements
11120   ! of this function:
11121   ! * high-performance native backend with same C# interface (C# version)
11122   ! * multithreading support (C++ and C# versions)
11123   ! * hardware vendor (Intel) implementations of linear algebra primitives
11124   !   (C++ and C# versions, x86/x64 platform)
11125   !
11126   ! We recommend you to read 'Working with commercial version' section  of
11127   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11128   ! related features provided by commercial edition of ALGLIB.
11129 
11130   -- ALGLIB --
11131      Copyright 2005-2010 by Bochkanov Sergey
11132 *************************************************************************/
11133 #if !defined(AE_NO_EXCEPTIONS)
rmatrixinverse(real_2d_array & a,ae_int_t & info,matinvreport & rep,const xparams _xparams)11134 void rmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11135 {
11136     jmp_buf _break_jump;
11137     alglib_impl::ae_state _alglib_env_state;
11138     ae_int_t n;
11139     if( (a.cols()!=a.rows()))
11140         _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixinverse': looks like one of arguments has wrong size");
11141     n = a.cols();
11142     alglib_impl::ae_state_init(&_alglib_env_state);
11143     if( setjmp(_break_jump) )
11144         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11145     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11146     if( _xparams.flags!=0x0 )
11147         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11148     alglib_impl::rmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11149 
11150     alglib_impl::ae_state_clear(&_alglib_env_state);
11151     return;
11152 }
11153 #endif
11154 
11155 /*************************************************************************
11156 Inversion of a matrix given by its LU decomposition.
11157 
11158 INPUT PARAMETERS:
11159     A       -   LU decomposition of the matrix
11160                 (output of CMatrixLU subroutine).
11161     Pivots  -   table of permutations
11162                 (the output of CMatrixLU subroutine).
11163     N       -   size of matrix A (optional) :
11164                 * if given, only principal NxN submatrix is processed  and
11165                   overwritten. other elements are unchanged.
11166                 * if not given,  size  is  automatically  determined  from
11167                   matrix size (A must be square matrix)
11168 
11169 OUTPUT PARAMETERS:
11170     Info    -   return code, same as in RMatrixLUInverse
11171     Rep     -   solver report, same as in RMatrixLUInverse
11172     A       -   inverse of matrix A, same as in RMatrixLUInverse
11173 
11174   ! FREE EDITION OF ALGLIB:
11175   !
11176   ! Free Edition of ALGLIB supports following important features for  this
11177   ! function:
11178   ! * C++ version: x64 SIMD support using C++ intrinsics
11179   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11180   !
11181   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11182   ! Reference Manual in order  to  find  out  how to activate SIMD support
11183   ! in ALGLIB.
11184 
11185   ! COMMERCIAL EDITION OF ALGLIB:
11186   !
11187   ! Commercial Edition of ALGLIB includes following important improvements
11188   ! of this function:
11189   ! * high-performance native backend with same C# interface (C# version)
11190   ! * multithreading support (C++ and C# versions)
11191   ! * hardware vendor (Intel) implementations of linear algebra primitives
11192   !   (C++ and C# versions, x86/x64 platform)
11193   !
11194   ! We recommend you to read 'Working with commercial version' section  of
11195   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11196   ! related features provided by commercial edition of ALGLIB.
11197 
11198   -- ALGLIB routine --
11199      05.02.2010
11200      Bochkanov Sergey
11201 *************************************************************************/
cmatrixluinverse(complex_2d_array & a,const integer_1d_array & pivots,const ae_int_t n,ae_int_t & info,matinvreport & rep,const xparams _xparams)11202 void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11203 {
11204     jmp_buf _break_jump;
11205     alglib_impl::ae_state _alglib_env_state;
11206     alglib_impl::ae_state_init(&_alglib_env_state);
11207     if( setjmp(_break_jump) )
11208     {
11209 #if !defined(AE_NO_EXCEPTIONS)
11210         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11211 #else
11212         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11213         return;
11214 #endif
11215     }
11216     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11217     if( _xparams.flags!=0x0 )
11218         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11219     alglib_impl::cmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11220     alglib_impl::ae_state_clear(&_alglib_env_state);
11221     return;
11222 }
11223 
11224 /*************************************************************************
11225 Inversion of a matrix given by its LU decomposition.
11226 
11227 INPUT PARAMETERS:
11228     A       -   LU decomposition of the matrix
11229                 (output of CMatrixLU subroutine).
11230     Pivots  -   table of permutations
11231                 (the output of CMatrixLU subroutine).
11232     N       -   size of matrix A (optional) :
11233                 * if given, only principal NxN submatrix is processed  and
11234                   overwritten. other elements are unchanged.
11235                 * if not given,  size  is  automatically  determined  from
11236                   matrix size (A must be square matrix)
11237 
11238 OUTPUT PARAMETERS:
11239     Info    -   return code, same as in RMatrixLUInverse
11240     Rep     -   solver report, same as in RMatrixLUInverse
11241     A       -   inverse of matrix A, same as in RMatrixLUInverse
11242 
11243   ! FREE EDITION OF ALGLIB:
11244   !
11245   ! Free Edition of ALGLIB supports following important features for  this
11246   ! function:
11247   ! * C++ version: x64 SIMD support using C++ intrinsics
11248   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11249   !
11250   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11251   ! Reference Manual in order  to  find  out  how to activate SIMD support
11252   ! in ALGLIB.
11253 
11254   ! COMMERCIAL EDITION OF ALGLIB:
11255   !
11256   ! Commercial Edition of ALGLIB includes following important improvements
11257   ! of this function:
11258   ! * high-performance native backend with same C# interface (C# version)
11259   ! * multithreading support (C++ and C# versions)
11260   ! * hardware vendor (Intel) implementations of linear algebra primitives
11261   !   (C++ and C# versions, x86/x64 platform)
11262   !
11263   ! We recommend you to read 'Working with commercial version' section  of
11264   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11265   ! related features provided by commercial edition of ALGLIB.
11266 
11267   -- ALGLIB routine --
11268      05.02.2010
11269      Bochkanov Sergey
11270 *************************************************************************/
11271 #if !defined(AE_NO_EXCEPTIONS)
cmatrixluinverse(complex_2d_array & a,const integer_1d_array & pivots,ae_int_t & info,matinvreport & rep,const xparams _xparams)11272 void cmatrixluinverse(complex_2d_array &a, const integer_1d_array &pivots, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11273 {
11274     jmp_buf _break_jump;
11275     alglib_impl::ae_state _alglib_env_state;
11276     ae_int_t n;
11277     if( (a.cols()!=a.rows()) || (a.cols()!=pivots.length()))
11278         _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixluinverse': looks like one of arguments has wrong size");
11279     n = a.cols();
11280     alglib_impl::ae_state_init(&_alglib_env_state);
11281     if( setjmp(_break_jump) )
11282         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11283     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11284     if( _xparams.flags!=0x0 )
11285         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11286     alglib_impl::cmatrixluinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11287 
11288     alglib_impl::ae_state_clear(&_alglib_env_state);
11289     return;
11290 }
11291 #endif
11292 
11293 /*************************************************************************
11294 Inversion of a general matrix.
11295 
11296 Input parameters:
11297     A       -   matrix
11298     N       -   size of matrix A (optional) :
11299                 * if given, only principal NxN submatrix is processed  and
11300                   overwritten. other elements are unchanged.
11301                 * if not given,  size  is  automatically  determined  from
11302                   matrix size (A must be square matrix)
11303 
11304 Output parameters:
11305     Info    -   return code, same as in RMatrixLUInverse
11306     Rep     -   solver report, same as in RMatrixLUInverse
11307     A       -   inverse of matrix A, same as in RMatrixLUInverse
11308 
11309   ! FREE EDITION OF ALGLIB:
11310   !
11311   ! Free Edition of ALGLIB supports following important features for  this
11312   ! function:
11313   ! * C++ version: x64 SIMD support using C++ intrinsics
11314   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11315   !
11316   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11317   ! Reference Manual in order  to  find  out  how to activate SIMD support
11318   ! in ALGLIB.
11319 
11320   ! COMMERCIAL EDITION OF ALGLIB:
11321   !
11322   ! Commercial Edition of ALGLIB includes following important improvements
11323   ! of this function:
11324   ! * high-performance native backend with same C# interface (C# version)
11325   ! * multithreading support (C++ and C# versions)
11326   ! * hardware vendor (Intel) implementations of linear algebra primitives
11327   !   (C++ and C# versions, x86/x64 platform)
11328   !
11329   ! We recommend you to read 'Working with commercial version' section  of
11330   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11331   ! related features provided by commercial edition of ALGLIB.
11332 
11333   -- ALGLIB --
11334      Copyright 2005 by Bochkanov Sergey
11335 *************************************************************************/
cmatrixinverse(complex_2d_array & a,const ae_int_t n,ae_int_t & info,matinvreport & rep,const xparams _xparams)11336 void cmatrixinverse(complex_2d_array &a, const ae_int_t n, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11337 {
11338     jmp_buf _break_jump;
11339     alglib_impl::ae_state _alglib_env_state;
11340     alglib_impl::ae_state_init(&_alglib_env_state);
11341     if( setjmp(_break_jump) )
11342     {
11343 #if !defined(AE_NO_EXCEPTIONS)
11344         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11345 #else
11346         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11347         return;
11348 #endif
11349     }
11350     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11351     if( _xparams.flags!=0x0 )
11352         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11353     alglib_impl::cmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11354     alglib_impl::ae_state_clear(&_alglib_env_state);
11355     return;
11356 }
11357 
11358 /*************************************************************************
11359 Inversion of a general matrix.
11360 
11361 Input parameters:
11362     A       -   matrix
11363     N       -   size of matrix A (optional) :
11364                 * if given, only principal NxN submatrix is processed  and
11365                   overwritten. other elements are unchanged.
11366                 * if not given,  size  is  automatically  determined  from
11367                   matrix size (A must be square matrix)
11368 
11369 Output parameters:
11370     Info    -   return code, same as in RMatrixLUInverse
11371     Rep     -   solver report, same as in RMatrixLUInverse
11372     A       -   inverse of matrix A, same as in RMatrixLUInverse
11373 
11374   ! FREE EDITION OF ALGLIB:
11375   !
11376   ! Free Edition of ALGLIB supports following important features for  this
11377   ! function:
11378   ! * C++ version: x64 SIMD support using C++ intrinsics
11379   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11380   !
11381   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11382   ! Reference Manual in order  to  find  out  how to activate SIMD support
11383   ! in ALGLIB.
11384 
11385   ! COMMERCIAL EDITION OF ALGLIB:
11386   !
11387   ! Commercial Edition of ALGLIB includes following important improvements
11388   ! of this function:
11389   ! * high-performance native backend with same C# interface (C# version)
11390   ! * multithreading support (C++ and C# versions)
11391   ! * hardware vendor (Intel) implementations of linear algebra primitives
11392   !   (C++ and C# versions, x86/x64 platform)
11393   !
11394   ! We recommend you to read 'Working with commercial version' section  of
11395   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11396   ! related features provided by commercial edition of ALGLIB.
11397 
11398   -- ALGLIB --
11399      Copyright 2005 by Bochkanov Sergey
11400 *************************************************************************/
11401 #if !defined(AE_NO_EXCEPTIONS)
cmatrixinverse(complex_2d_array & a,ae_int_t & info,matinvreport & rep,const xparams _xparams)11402 void cmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11403 {
11404     jmp_buf _break_jump;
11405     alglib_impl::ae_state _alglib_env_state;
11406     ae_int_t n;
11407     if( (a.cols()!=a.rows()))
11408         _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixinverse': looks like one of arguments has wrong size");
11409     n = a.cols();
11410     alglib_impl::ae_state_init(&_alglib_env_state);
11411     if( setjmp(_break_jump) )
11412         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11413     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11414     if( _xparams.flags!=0x0 )
11415         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11416     alglib_impl::cmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11417 
11418     alglib_impl::ae_state_clear(&_alglib_env_state);
11419     return;
11420 }
11421 #endif
11422 
11423 /*************************************************************************
11424 Inversion of a symmetric positive definite matrix which is given
11425 by Cholesky decomposition.
11426 
11427 Input parameters:
11428     A       -   Cholesky decomposition of the matrix to be inverted:
11429                 A=U'*U or A = L*L'.
11430                 Output of  SPDMatrixCholesky subroutine.
11431     N       -   size of matrix A (optional) :
11432                 * if given, only principal NxN submatrix is processed  and
11433                   overwritten. other elements are unchanged.
11434                 * if not given,  size  is  automatically  determined  from
11435                   matrix size (A must be square matrix)
11436     IsUpper -   storage type (optional):
11437                 * if True, symmetric  matrix  A  is  given  by  its  upper
11438                   triangle, and the lower triangle isn't  used/changed  by
11439                   function
11440                 * if False,  symmetric matrix  A  is  given  by  its lower
11441                   triangle, and the  upper triangle isn't used/changed  by
11442                   function
11443                 * if not given, lower half is used.
11444 
11445 Output parameters:
11446     Info    -   return code, same as in RMatrixLUInverse
11447     Rep     -   solver report, same as in RMatrixLUInverse
11448     A       -   inverse of matrix A, same as in RMatrixLUInverse
11449 
11450   ! FREE EDITION OF ALGLIB:
11451   !
11452   ! Free Edition of ALGLIB supports following important features for  this
11453   ! function:
11454   ! * C++ version: x64 SIMD support using C++ intrinsics
11455   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11456   !
11457   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11458   ! Reference Manual in order  to  find  out  how to activate SIMD support
11459   ! in ALGLIB.
11460 
11461   ! COMMERCIAL EDITION OF ALGLIB:
11462   !
11463   ! Commercial Edition of ALGLIB includes following important improvements
11464   ! of this function:
11465   ! * high-performance native backend with same C# interface (C# version)
11466   ! * multithreading support (C++ and C# versions)
11467   ! * hardware vendor (Intel) implementations of linear algebra primitives
11468   !   (C++ and C# versions, x86/x64 platform)
11469   !
11470   ! We recommend you to read 'Working with commercial version' section  of
11471   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11472   ! related features provided by commercial edition of ALGLIB.
11473 
11474   -- ALGLIB routine --
11475      10.02.2010
11476      Bochkanov Sergey
11477 *************************************************************************/
spdmatrixcholeskyinverse(real_2d_array & a,const ae_int_t n,const bool isupper,ae_int_t & info,matinvreport & rep,const xparams _xparams)11478 void spdmatrixcholeskyinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11479 {
11480     jmp_buf _break_jump;
11481     alglib_impl::ae_state _alglib_env_state;
11482     alglib_impl::ae_state_init(&_alglib_env_state);
11483     if( setjmp(_break_jump) )
11484     {
11485 #if !defined(AE_NO_EXCEPTIONS)
11486         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11487 #else
11488         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11489         return;
11490 #endif
11491     }
11492     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11493     if( _xparams.flags!=0x0 )
11494         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11495     alglib_impl::spdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11496     alglib_impl::ae_state_clear(&_alglib_env_state);
11497     return;
11498 }
11499 
11500 /*************************************************************************
11501 Inversion of a symmetric positive definite matrix which is given
11502 by Cholesky decomposition.
11503 
11504 Input parameters:
11505     A       -   Cholesky decomposition of the matrix to be inverted:
11506                 A=U'*U or A = L*L'.
11507                 Output of  SPDMatrixCholesky subroutine.
11508     N       -   size of matrix A (optional) :
11509                 * if given, only principal NxN submatrix is processed  and
11510                   overwritten. other elements are unchanged.
11511                 * if not given,  size  is  automatically  determined  from
11512                   matrix size (A must be square matrix)
11513     IsUpper -   storage type (optional):
11514                 * if True, symmetric  matrix  A  is  given  by  its  upper
11515                   triangle, and the lower triangle isn't  used/changed  by
11516                   function
11517                 * if False,  symmetric matrix  A  is  given  by  its lower
11518                   triangle, and the  upper triangle isn't used/changed  by
11519                   function
11520                 * if not given, lower half is used.
11521 
11522 Output parameters:
11523     Info    -   return code, same as in RMatrixLUInverse
11524     Rep     -   solver report, same as in RMatrixLUInverse
11525     A       -   inverse of matrix A, same as in RMatrixLUInverse
11526 
11527   ! FREE EDITION OF ALGLIB:
11528   !
11529   ! Free Edition of ALGLIB supports following important features for  this
11530   ! function:
11531   ! * C++ version: x64 SIMD support using C++ intrinsics
11532   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11533   !
11534   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11535   ! Reference Manual in order  to  find  out  how to activate SIMD support
11536   ! in ALGLIB.
11537 
11538   ! COMMERCIAL EDITION OF ALGLIB:
11539   !
11540   ! Commercial Edition of ALGLIB includes following important improvements
11541   ! of this function:
11542   ! * high-performance native backend with same C# interface (C# version)
11543   ! * multithreading support (C++ and C# versions)
11544   ! * hardware vendor (Intel) implementations of linear algebra primitives
11545   !   (C++ and C# versions, x86/x64 platform)
11546   !
11547   ! We recommend you to read 'Working with commercial version' section  of
11548   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11549   ! related features provided by commercial edition of ALGLIB.
11550 
11551   -- ALGLIB routine --
11552      10.02.2010
11553      Bochkanov Sergey
11554 *************************************************************************/
11555 #if !defined(AE_NO_EXCEPTIONS)
spdmatrixcholeskyinverse(real_2d_array & a,ae_int_t & info,matinvreport & rep,const xparams _xparams)11556 void spdmatrixcholeskyinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11557 {
11558     jmp_buf _break_jump;
11559     alglib_impl::ae_state _alglib_env_state;
11560     ae_int_t n;
11561     bool isupper;
11562     if( (a.cols()!=a.rows()))
11563         _ALGLIB_CPP_EXCEPTION("Error while calling 'spdmatrixcholeskyinverse': looks like one of arguments has wrong size");
11564     n = a.cols();
11565     isupper = false;
11566     alglib_impl::ae_state_init(&_alglib_env_state);
11567     if( setjmp(_break_jump) )
11568         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11569     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11570     if( _xparams.flags!=0x0 )
11571         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11572     alglib_impl::spdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11573 
11574     alglib_impl::ae_state_clear(&_alglib_env_state);
11575     return;
11576 }
11577 #endif
11578 
11579 /*************************************************************************
11580 Inversion of a symmetric positive definite matrix.
11581 
11582 Given an upper or lower triangle of a symmetric positive definite matrix,
11583 the algorithm generates matrix A^-1 and saves the upper or lower triangle
11584 depending on the input.
11585 
11586 Input parameters:
11587     A       -   matrix to be inverted (upper or lower triangle).
11588                 Array with elements [0..N-1,0..N-1].
11589     N       -   size of matrix A (optional) :
11590                 * if given, only principal NxN submatrix is processed  and
11591                   overwritten. other elements are unchanged.
11592                 * if not given,  size  is  automatically  determined  from
11593                   matrix size (A must be square matrix)
11594     IsUpper -   storage type (optional):
11595                 * if True, symmetric  matrix  A  is  given  by  its  upper
11596                   triangle, and the lower triangle isn't  used/changed  by
11597                   function
11598                 * if False,  symmetric matrix  A  is  given  by  its lower
11599                   triangle, and the  upper triangle isn't used/changed  by
11600                   function
11601                 * if not given,  both lower and upper  triangles  must  be
11602                   filled.
11603 
11604 Output parameters:
11605     Info    -   return code, same as in RMatrixLUInverse
11606     Rep     -   solver report, same as in RMatrixLUInverse
11607     A       -   inverse of matrix A, same as in RMatrixLUInverse
11608 
11609   ! FREE EDITION OF ALGLIB:
11610   !
11611   ! Free Edition of ALGLIB supports following important features for  this
11612   ! function:
11613   ! * C++ version: x64 SIMD support using C++ intrinsics
11614   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11615   !
11616   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11617   ! Reference Manual in order  to  find  out  how to activate SIMD support
11618   ! in ALGLIB.
11619 
11620   ! COMMERCIAL EDITION OF ALGLIB:
11621   !
11622   ! Commercial Edition of ALGLIB includes following important improvements
11623   ! of this function:
11624   ! * high-performance native backend with same C# interface (C# version)
11625   ! * multithreading support (C++ and C# versions)
11626   ! * hardware vendor (Intel) implementations of linear algebra primitives
11627   !   (C++ and C# versions, x86/x64 platform)
11628   !
11629   ! We recommend you to read 'Working with commercial version' section  of
11630   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11631   ! related features provided by commercial edition of ALGLIB.
11632 
11633   -- ALGLIB routine --
11634      10.02.2010
11635      Bochkanov Sergey
11636 *************************************************************************/
spdmatrixinverse(real_2d_array & a,const ae_int_t n,const bool isupper,ae_int_t & info,matinvreport & rep,const xparams _xparams)11637 void spdmatrixinverse(real_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11638 {
11639     jmp_buf _break_jump;
11640     alglib_impl::ae_state _alglib_env_state;
11641     alglib_impl::ae_state_init(&_alglib_env_state);
11642     if( setjmp(_break_jump) )
11643     {
11644 #if !defined(AE_NO_EXCEPTIONS)
11645         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11646 #else
11647         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11648         return;
11649 #endif
11650     }
11651     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11652     if( _xparams.flags!=0x0 )
11653         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11654     alglib_impl::spdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11655     alglib_impl::ae_state_clear(&_alglib_env_state);
11656     return;
11657 }
11658 
11659 /*************************************************************************
11660 Inversion of a symmetric positive definite matrix.
11661 
11662 Given an upper or lower triangle of a symmetric positive definite matrix,
11663 the algorithm generates matrix A^-1 and saves the upper or lower triangle
11664 depending on the input.
11665 
11666 Input parameters:
11667     A       -   matrix to be inverted (upper or lower triangle).
11668                 Array with elements [0..N-1,0..N-1].
11669     N       -   size of matrix A (optional) :
11670                 * if given, only principal NxN submatrix is processed  and
11671                   overwritten. other elements are unchanged.
11672                 * if not given,  size  is  automatically  determined  from
11673                   matrix size (A must be square matrix)
11674     IsUpper -   storage type (optional):
11675                 * if True, symmetric  matrix  A  is  given  by  its  upper
11676                   triangle, and the lower triangle isn't  used/changed  by
11677                   function
11678                 * if False,  symmetric matrix  A  is  given  by  its lower
11679                   triangle, and the  upper triangle isn't used/changed  by
11680                   function
11681                 * if not given,  both lower and upper  triangles  must  be
11682                   filled.
11683 
11684 Output parameters:
11685     Info    -   return code, same as in RMatrixLUInverse
11686     Rep     -   solver report, same as in RMatrixLUInverse
11687     A       -   inverse of matrix A, same as in RMatrixLUInverse
11688 
11689   ! FREE EDITION OF ALGLIB:
11690   !
11691   ! Free Edition of ALGLIB supports following important features for  this
11692   ! function:
11693   ! * C++ version: x64 SIMD support using C++ intrinsics
11694   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11695   !
11696   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11697   ! Reference Manual in order  to  find  out  how to activate SIMD support
11698   ! in ALGLIB.
11699 
11700   ! COMMERCIAL EDITION OF ALGLIB:
11701   !
11702   ! Commercial Edition of ALGLIB includes following important improvements
11703   ! of this function:
11704   ! * high-performance native backend with same C# interface (C# version)
11705   ! * multithreading support (C++ and C# versions)
11706   ! * hardware vendor (Intel) implementations of linear algebra primitives
11707   !   (C++ and C# versions, x86/x64 platform)
11708   !
11709   ! We recommend you to read 'Working with commercial version' section  of
11710   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11711   ! related features provided by commercial edition of ALGLIB.
11712 
11713   -- ALGLIB routine --
11714      10.02.2010
11715      Bochkanov Sergey
11716 *************************************************************************/
11717 #if !defined(AE_NO_EXCEPTIONS)
spdmatrixinverse(real_2d_array & a,ae_int_t & info,matinvreport & rep,const xparams _xparams)11718 void spdmatrixinverse(real_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11719 {
11720     jmp_buf _break_jump;
11721     alglib_impl::ae_state _alglib_env_state;
11722     ae_int_t n;
11723     bool isupper;
11724     if( (a.cols()!=a.rows()))
11725         _ALGLIB_CPP_EXCEPTION("Error while calling 'spdmatrixinverse': looks like one of arguments has wrong size");
11726     if( !alglib_impl::ae_is_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
11727         _ALGLIB_CPP_EXCEPTION("'a' parameter is not symmetric matrix");
11728     n = a.cols();
11729     isupper = false;
11730     alglib_impl::ae_state_init(&_alglib_env_state);
11731     if( setjmp(_break_jump) )
11732         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11733     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11734     if( _xparams.flags!=0x0 )
11735         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11736     alglib_impl::spdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11737         if( !alglib_impl::ae_force_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
11738             _ALGLIB_CPP_EXCEPTION("Internal error while forcing symmetricity of 'a' parameter");
11739     alglib_impl::ae_state_clear(&_alglib_env_state);
11740     return;
11741 }
11742 #endif
11743 
11744 /*************************************************************************
11745 Inversion of a Hermitian positive definite matrix which is given
11746 by Cholesky decomposition.
11747 
11748 Input parameters:
11749     A       -   Cholesky decomposition of the matrix to be inverted:
11750                 A=U'*U or A = L*L'.
11751                 Output of  HPDMatrixCholesky subroutine.
11752     N       -   size of matrix A (optional) :
11753                 * if given, only principal NxN submatrix is processed  and
11754                   overwritten. other elements are unchanged.
11755                 * if not given,  size  is  automatically  determined  from
11756                   matrix size (A must be square matrix)
11757     IsUpper -   storage type (optional):
11758                 * if True, symmetric  matrix  A  is  given  by  its  upper
11759                   triangle, and the lower triangle isn't  used/changed  by
11760                   function
11761                 * if False,  symmetric matrix  A  is  given  by  its lower
11762                   triangle, and the  upper triangle isn't used/changed  by
11763                   function
11764                 * if not given, lower half is used.
11765 
11766 Output parameters:
11767     Info    -   return code, same as in RMatrixLUInverse
11768     Rep     -   solver report, same as in RMatrixLUInverse
11769     A       -   inverse of matrix A, same as in RMatrixLUInverse
11770 
11771   ! FREE EDITION OF ALGLIB:
11772   !
11773   ! Free Edition of ALGLIB supports following important features for  this
11774   ! function:
11775   ! * C++ version: x64 SIMD support using C++ intrinsics
11776   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11777   !
11778   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11779   ! Reference Manual in order  to  find  out  how to activate SIMD support
11780   ! in ALGLIB.
11781 
11782   ! COMMERCIAL EDITION OF ALGLIB:
11783   !
11784   ! Commercial Edition of ALGLIB includes following important improvements
11785   ! of this function:
11786   ! * high-performance native backend with same C# interface (C# version)
11787   ! * multithreading support (C++ and C# versions)
11788   ! * hardware vendor (Intel) implementations of linear algebra primitives
11789   !   (C++ and C# versions, x86/x64 platform)
11790   !
11791   ! We recommend you to read 'Working with commercial version' section  of
11792   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11793   ! related features provided by commercial edition of ALGLIB.
11794 
11795   -- ALGLIB routine --
11796      10.02.2010
11797      Bochkanov Sergey
11798 *************************************************************************/
hpdmatrixcholeskyinverse(complex_2d_array & a,const ae_int_t n,const bool isupper,ae_int_t & info,matinvreport & rep,const xparams _xparams)11799 void hpdmatrixcholeskyinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11800 {
11801     jmp_buf _break_jump;
11802     alglib_impl::ae_state _alglib_env_state;
11803     alglib_impl::ae_state_init(&_alglib_env_state);
11804     if( setjmp(_break_jump) )
11805     {
11806 #if !defined(AE_NO_EXCEPTIONS)
11807         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11808 #else
11809         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11810         return;
11811 #endif
11812     }
11813     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11814     if( _xparams.flags!=0x0 )
11815         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11816     alglib_impl::hpdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11817     alglib_impl::ae_state_clear(&_alglib_env_state);
11818     return;
11819 }
11820 
11821 /*************************************************************************
11822 Inversion of a Hermitian positive definite matrix which is given
11823 by Cholesky decomposition.
11824 
11825 Input parameters:
11826     A       -   Cholesky decomposition of the matrix to be inverted:
11827                 A=U'*U or A = L*L'.
11828                 Output of  HPDMatrixCholesky subroutine.
11829     N       -   size of matrix A (optional) :
11830                 * if given, only principal NxN submatrix is processed  and
11831                   overwritten. other elements are unchanged.
11832                 * if not given,  size  is  automatically  determined  from
11833                   matrix size (A must be square matrix)
11834     IsUpper -   storage type (optional):
11835                 * if True, symmetric  matrix  A  is  given  by  its  upper
11836                   triangle, and the lower triangle isn't  used/changed  by
11837                   function
11838                 * if False,  symmetric matrix  A  is  given  by  its lower
11839                   triangle, and the  upper triangle isn't used/changed  by
11840                   function
11841                 * if not given, lower half is used.
11842 
11843 Output parameters:
11844     Info    -   return code, same as in RMatrixLUInverse
11845     Rep     -   solver report, same as in RMatrixLUInverse
11846     A       -   inverse of matrix A, same as in RMatrixLUInverse
11847 
11848   ! FREE EDITION OF ALGLIB:
11849   !
11850   ! Free Edition of ALGLIB supports following important features for  this
11851   ! function:
11852   ! * C++ version: x64 SIMD support using C++ intrinsics
11853   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11854   !
11855   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11856   ! Reference Manual in order  to  find  out  how to activate SIMD support
11857   ! in ALGLIB.
11858 
11859   ! COMMERCIAL EDITION OF ALGLIB:
11860   !
11861   ! Commercial Edition of ALGLIB includes following important improvements
11862   ! of this function:
11863   ! * high-performance native backend with same C# interface (C# version)
11864   ! * multithreading support (C++ and C# versions)
11865   ! * hardware vendor (Intel) implementations of linear algebra primitives
11866   !   (C++ and C# versions, x86/x64 platform)
11867   !
11868   ! We recommend you to read 'Working with commercial version' section  of
11869   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11870   ! related features provided by commercial edition of ALGLIB.
11871 
11872   -- ALGLIB routine --
11873      10.02.2010
11874      Bochkanov Sergey
11875 *************************************************************************/
11876 #if !defined(AE_NO_EXCEPTIONS)
hpdmatrixcholeskyinverse(complex_2d_array & a,ae_int_t & info,matinvreport & rep,const xparams _xparams)11877 void hpdmatrixcholeskyinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11878 {
11879     jmp_buf _break_jump;
11880     alglib_impl::ae_state _alglib_env_state;
11881     ae_int_t n;
11882     bool isupper;
11883     if( (a.cols()!=a.rows()))
11884         _ALGLIB_CPP_EXCEPTION("Error while calling 'hpdmatrixcholeskyinverse': looks like one of arguments has wrong size");
11885     n = a.cols();
11886     isupper = false;
11887     alglib_impl::ae_state_init(&_alglib_env_state);
11888     if( setjmp(_break_jump) )
11889         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11890     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11891     if( _xparams.flags!=0x0 )
11892         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11893     alglib_impl::hpdmatrixcholeskyinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11894 
11895     alglib_impl::ae_state_clear(&_alglib_env_state);
11896     return;
11897 }
11898 #endif
11899 
11900 /*************************************************************************
11901 Inversion of a Hermitian positive definite matrix.
11902 
11903 Given an upper or lower triangle of a Hermitian positive definite matrix,
11904 the algorithm generates matrix A^-1 and saves the upper or lower triangle
11905 depending on the input.
11906 
11907 Input parameters:
11908     A       -   matrix to be inverted (upper or lower triangle).
11909                 Array with elements [0..N-1,0..N-1].
11910     N       -   size of matrix A (optional) :
11911                 * if given, only principal NxN submatrix is processed  and
11912                   overwritten. other elements are unchanged.
11913                 * if not given,  size  is  automatically  determined  from
11914                   matrix size (A must be square matrix)
11915     IsUpper -   storage type (optional):
11916                 * if True, symmetric  matrix  A  is  given  by  its  upper
11917                   triangle, and the lower triangle isn't  used/changed  by
11918                   function
11919                 * if False,  symmetric matrix  A  is  given  by  its lower
11920                   triangle, and the  upper triangle isn't used/changed  by
11921                   function
11922                 * if not given,  both lower and upper  triangles  must  be
11923                   filled.
11924 
11925 Output parameters:
11926     Info    -   return code, same as in RMatrixLUInverse
11927     Rep     -   solver report, same as in RMatrixLUInverse
11928     A       -   inverse of matrix A, same as in RMatrixLUInverse
11929 
11930   ! FREE EDITION OF ALGLIB:
11931   !
11932   ! Free Edition of ALGLIB supports following important features for  this
11933   ! function:
11934   ! * C++ version: x64 SIMD support using C++ intrinsics
11935   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
11936   !
11937   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
11938   ! Reference Manual in order  to  find  out  how to activate SIMD support
11939   ! in ALGLIB.
11940 
11941   ! COMMERCIAL EDITION OF ALGLIB:
11942   !
11943   ! Commercial Edition of ALGLIB includes following important improvements
11944   ! of this function:
11945   ! * high-performance native backend with same C# interface (C# version)
11946   ! * multithreading support (C++ and C# versions)
11947   ! * hardware vendor (Intel) implementations of linear algebra primitives
11948   !   (C++ and C# versions, x86/x64 platform)
11949   !
11950   ! We recommend you to read 'Working with commercial version' section  of
11951   ! ALGLIB Reference Manual in order to find out how to  use  performance-
11952   ! related features provided by commercial edition of ALGLIB.
11953 
11954   -- ALGLIB routine --
11955      10.02.2010
11956      Bochkanov Sergey
11957 *************************************************************************/
hpdmatrixinverse(complex_2d_array & a,const ae_int_t n,const bool isupper,ae_int_t & info,matinvreport & rep,const xparams _xparams)11958 void hpdmatrixinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
11959 {
11960     jmp_buf _break_jump;
11961     alglib_impl::ae_state _alglib_env_state;
11962     alglib_impl::ae_state_init(&_alglib_env_state);
11963     if( setjmp(_break_jump) )
11964     {
11965 #if !defined(AE_NO_EXCEPTIONS)
11966         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11967 #else
11968         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11969         return;
11970 #endif
11971     }
11972     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11973     if( _xparams.flags!=0x0 )
11974         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11975     alglib_impl::hpdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
11976     alglib_impl::ae_state_clear(&_alglib_env_state);
11977     return;
11978 }
11979 
11980 /*************************************************************************
11981 Inversion of a Hermitian positive definite matrix.
11982 
11983 Given an upper or lower triangle of a Hermitian positive definite matrix,
11984 the algorithm generates matrix A^-1 and saves the upper or lower triangle
11985 depending on the input.
11986 
11987 Input parameters:
11988     A       -   matrix to be inverted (upper or lower triangle).
11989                 Array with elements [0..N-1,0..N-1].
11990     N       -   size of matrix A (optional) :
11991                 * if given, only principal NxN submatrix is processed  and
11992                   overwritten. other elements are unchanged.
11993                 * if not given,  size  is  automatically  determined  from
11994                   matrix size (A must be square matrix)
11995     IsUpper -   storage type (optional):
11996                 * if True, symmetric  matrix  A  is  given  by  its  upper
11997                   triangle, and the lower triangle isn't  used/changed  by
11998                   function
11999                 * if False,  symmetric matrix  A  is  given  by  its lower
12000                   triangle, and the  upper triangle isn't used/changed  by
12001                   function
12002                 * if not given,  both lower and upper  triangles  must  be
12003                   filled.
12004 
12005 Output parameters:
12006     Info    -   return code, same as in RMatrixLUInverse
12007     Rep     -   solver report, same as in RMatrixLUInverse
12008     A       -   inverse of matrix A, same as in RMatrixLUInverse
12009 
12010   ! FREE EDITION OF ALGLIB:
12011   !
12012   ! Free Edition of ALGLIB supports following important features for  this
12013   ! function:
12014   ! * C++ version: x64 SIMD support using C++ intrinsics
12015   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
12016   !
12017   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
12018   ! Reference Manual in order  to  find  out  how to activate SIMD support
12019   ! in ALGLIB.
12020 
12021   ! COMMERCIAL EDITION OF ALGLIB:
12022   !
12023   ! Commercial Edition of ALGLIB includes following important improvements
12024   ! of this function:
12025   ! * high-performance native backend with same C# interface (C# version)
12026   ! * multithreading support (C++ and C# versions)
12027   ! * hardware vendor (Intel) implementations of linear algebra primitives
12028   !   (C++ and C# versions, x86/x64 platform)
12029   !
12030   ! We recommend you to read 'Working with commercial version' section  of
12031   ! ALGLIB Reference Manual in order to find out how to  use  performance-
12032   ! related features provided by commercial edition of ALGLIB.
12033 
12034   -- ALGLIB routine --
12035      10.02.2010
12036      Bochkanov Sergey
12037 *************************************************************************/
12038 #if !defined(AE_NO_EXCEPTIONS)
hpdmatrixinverse(complex_2d_array & a,ae_int_t & info,matinvreport & rep,const xparams _xparams)12039 void hpdmatrixinverse(complex_2d_array &a, ae_int_t &info, matinvreport &rep, const xparams _xparams)
12040 {
12041     jmp_buf _break_jump;
12042     alglib_impl::ae_state _alglib_env_state;
12043     ae_int_t n;
12044     bool isupper;
12045     if( (a.cols()!=a.rows()))
12046         _ALGLIB_CPP_EXCEPTION("Error while calling 'hpdmatrixinverse': looks like one of arguments has wrong size");
12047     if( !alglib_impl::ae_is_hermitian(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
12048         _ALGLIB_CPP_EXCEPTION("'a' parameter is not Hermitian matrix");
12049     n = a.cols();
12050     isupper = false;
12051     alglib_impl::ae_state_init(&_alglib_env_state);
12052     if( setjmp(_break_jump) )
12053         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12054     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12055     if( _xparams.flags!=0x0 )
12056         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12057     alglib_impl::hpdmatrixinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
12058         if( !alglib_impl::ae_force_hermitian(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
12059             _ALGLIB_CPP_EXCEPTION("Internal error while forcing Hermitian properties of 'a' parameter");
12060     alglib_impl::ae_state_clear(&_alglib_env_state);
12061     return;
12062 }
12063 #endif
12064 
12065 /*************************************************************************
12066 Triangular matrix inverse (real)
12067 
12068 The subroutine inverts the following types of matrices:
12069     * upper triangular
12070     * upper triangular with unit diagonal
12071     * lower triangular
12072     * lower triangular with unit diagonal
12073 
12074 In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
12075 also be upper (lower) triangular, and after the end of the algorithm,  the
12076 inverse matrix replaces the source matrix. The elements  below (above) the
12077 main diagonal are not changed by the algorithm.
12078 
12079 If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
12080 diagonal, and the diagonal elements are not passed to the algorithm.
12081 
12082 Input parameters:
12083     A       -   matrix, array[0..N-1, 0..N-1].
12084     N       -   size of matrix A (optional) :
12085                 * if given, only principal NxN submatrix is processed  and
12086                   overwritten. other elements are unchanged.
12087                 * if not given,  size  is  automatically  determined  from
12088                   matrix size (A must be square matrix)
12089     IsUpper -   True, if the matrix is upper triangular.
12090     IsUnit  -   diagonal type (optional):
12091                 * if True, matrix has unit diagonal (a[i,i] are NOT used)
12092                 * if False, matrix diagonal is arbitrary
12093                 * if not given, False is assumed
12094 
12095 Output parameters:
12096     Info    -   same as for RMatrixLUInverse
12097     Rep     -   same as for RMatrixLUInverse
12098     A       -   same as for RMatrixLUInverse.
12099 
12100   ! FREE EDITION OF ALGLIB:
12101   !
12102   ! Free Edition of ALGLIB supports following important features for  this
12103   ! function:
12104   ! * C++ version: x64 SIMD support using C++ intrinsics
12105   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
12106   !
12107   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
12108   ! Reference Manual in order  to  find  out  how to activate SIMD support
12109   ! in ALGLIB.
12110 
12111   ! COMMERCIAL EDITION OF ALGLIB:
12112   !
12113   ! Commercial Edition of ALGLIB includes following important improvements
12114   ! of this function:
12115   ! * high-performance native backend with same C# interface (C# version)
12116   ! * multithreading support (C++ and C# versions)
12117   ! * hardware vendor (Intel) implementations of linear algebra primitives
12118   !   (C++ and C# versions, x86/x64 platform)
12119   !
12120   ! We recommend you to read 'Working with commercial version' section  of
12121   ! ALGLIB Reference Manual in order to find out how to  use  performance-
12122   ! related features provided by commercial edition of ALGLIB.
12123 
12124   -- ALGLIB --
12125      Copyright 05.02.2010 by Bochkanov Sergey
12126 *************************************************************************/
rmatrixtrinverse(real_2d_array & a,const ae_int_t n,const bool isupper,const bool isunit,ae_int_t & info,matinvreport & rep,const xparams _xparams)12127 void rmatrixtrinverse(real_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep, const xparams _xparams)
12128 {
12129     jmp_buf _break_jump;
12130     alglib_impl::ae_state _alglib_env_state;
12131     alglib_impl::ae_state_init(&_alglib_env_state);
12132     if( setjmp(_break_jump) )
12133     {
12134 #if !defined(AE_NO_EXCEPTIONS)
12135         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12136 #else
12137         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12138         return;
12139 #endif
12140     }
12141     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12142     if( _xparams.flags!=0x0 )
12143         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12144     alglib_impl::rmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
12145     alglib_impl::ae_state_clear(&_alglib_env_state);
12146     return;
12147 }
12148 
12149 /*************************************************************************
12150 Triangular matrix inverse (real)
12151 
12152 The subroutine inverts the following types of matrices:
12153     * upper triangular
12154     * upper triangular with unit diagonal
12155     * lower triangular
12156     * lower triangular with unit diagonal
12157 
12158 In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
12159 also be upper (lower) triangular, and after the end of the algorithm,  the
12160 inverse matrix replaces the source matrix. The elements  below (above) the
12161 main diagonal are not changed by the algorithm.
12162 
12163 If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
12164 diagonal, and the diagonal elements are not passed to the algorithm.
12165 
12166 Input parameters:
12167     A       -   matrix, array[0..N-1, 0..N-1].
12168     N       -   size of matrix A (optional) :
12169                 * if given, only principal NxN submatrix is processed  and
12170                   overwritten. other elements are unchanged.
12171                 * if not given,  size  is  automatically  determined  from
12172                   matrix size (A must be square matrix)
12173     IsUpper -   True, if the matrix is upper triangular.
12174     IsUnit  -   diagonal type (optional):
12175                 * if True, matrix has unit diagonal (a[i,i] are NOT used)
12176                 * if False, matrix diagonal is arbitrary
12177                 * if not given, False is assumed
12178 
12179 Output parameters:
12180     Info    -   same as for RMatrixLUInverse
12181     Rep     -   same as for RMatrixLUInverse
12182     A       -   same as for RMatrixLUInverse.
12183 
12184   ! FREE EDITION OF ALGLIB:
12185   !
12186   ! Free Edition of ALGLIB supports following important features for  this
12187   ! function:
12188   ! * C++ version: x64 SIMD support using C++ intrinsics
12189   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
12190   !
12191   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
12192   ! Reference Manual in order  to  find  out  how to activate SIMD support
12193   ! in ALGLIB.
12194 
12195   ! COMMERCIAL EDITION OF ALGLIB:
12196   !
12197   ! Commercial Edition of ALGLIB includes following important improvements
12198   ! of this function:
12199   ! * high-performance native backend with same C# interface (C# version)
12200   ! * multithreading support (C++ and C# versions)
12201   ! * hardware vendor (Intel) implementations of linear algebra primitives
12202   !   (C++ and C# versions, x86/x64 platform)
12203   !
12204   ! We recommend you to read 'Working with commercial version' section  of
12205   ! ALGLIB Reference Manual in order to find out how to  use  performance-
12206   ! related features provided by commercial edition of ALGLIB.
12207 
12208   -- ALGLIB --
12209      Copyright 05.02.2010 by Bochkanov Sergey
12210 *************************************************************************/
12211 #if !defined(AE_NO_EXCEPTIONS)
rmatrixtrinverse(real_2d_array & a,const bool isupper,ae_int_t & info,matinvreport & rep,const xparams _xparams)12212 void rmatrixtrinverse(real_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
12213 {
12214     jmp_buf _break_jump;
12215     alglib_impl::ae_state _alglib_env_state;
12216     ae_int_t n;
12217     bool isunit;
12218     if( (a.cols()!=a.rows()))
12219         _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixtrinverse': looks like one of arguments has wrong size");
12220     n = a.cols();
12221     isunit = false;
12222     alglib_impl::ae_state_init(&_alglib_env_state);
12223     if( setjmp(_break_jump) )
12224         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12225     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12226     if( _xparams.flags!=0x0 )
12227         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12228     alglib_impl::rmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
12229 
12230     alglib_impl::ae_state_clear(&_alglib_env_state);
12231     return;
12232 }
12233 #endif
12234 
12235 /*************************************************************************
12236 Triangular matrix inverse (complex)
12237 
12238 The subroutine inverts the following types of matrices:
12239     * upper triangular
12240     * upper triangular with unit diagonal
12241     * lower triangular
12242     * lower triangular with unit diagonal
12243 
12244 In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
12245 also be upper (lower) triangular, and after the end of the algorithm,  the
12246 inverse matrix replaces the source matrix. The elements  below (above) the
12247 main diagonal are not changed by the algorithm.
12248 
12249 If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
12250 diagonal, and the diagonal elements are not passed to the algorithm.
12251 
12252 Input parameters:
12253     A       -   matrix, array[0..N-1, 0..N-1].
12254     N       -   size of matrix A (optional) :
12255                 * if given, only principal NxN submatrix is processed  and
12256                   overwritten. other elements are unchanged.
12257                 * if not given,  size  is  automatically  determined  from
12258                   matrix size (A must be square matrix)
12259     IsUpper -   True, if the matrix is upper triangular.
12260     IsUnit  -   diagonal type (optional):
12261                 * if True, matrix has unit diagonal (a[i,i] are NOT used)
12262                 * if False, matrix diagonal is arbitrary
12263                 * if not given, False is assumed
12264 
12265 Output parameters:
12266     Info    -   same as for RMatrixLUInverse
12267     Rep     -   same as for RMatrixLUInverse
12268     A       -   same as for RMatrixLUInverse.
12269 
12270   ! FREE EDITION OF ALGLIB:
12271   !
12272   ! Free Edition of ALGLIB supports following important features for  this
12273   ! function:
12274   ! * C++ version: x64 SIMD support using C++ intrinsics
12275   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
12276   !
12277   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
12278   ! Reference Manual in order  to  find  out  how to activate SIMD support
12279   ! in ALGLIB.
12280 
12281   ! COMMERCIAL EDITION OF ALGLIB:
12282   !
12283   ! Commercial Edition of ALGLIB includes following important improvements
12284   ! of this function:
12285   ! * high-performance native backend with same C# interface (C# version)
12286   ! * multithreading support (C++ and C# versions)
12287   ! * hardware vendor (Intel) implementations of linear algebra primitives
12288   !   (C++ and C# versions, x86/x64 platform)
12289   !
12290   ! We recommend you to read 'Working with commercial version' section  of
12291   ! ALGLIB Reference Manual in order to find out how to  use  performance-
12292   ! related features provided by commercial edition of ALGLIB.
12293 
12294   -- ALGLIB --
12295      Copyright 05.02.2010 by Bochkanov Sergey
12296 *************************************************************************/
cmatrixtrinverse(complex_2d_array & a,const ae_int_t n,const bool isupper,const bool isunit,ae_int_t & info,matinvreport & rep,const xparams _xparams)12297 void cmatrixtrinverse(complex_2d_array &a, const ae_int_t n, const bool isupper, const bool isunit, ae_int_t &info, matinvreport &rep, const xparams _xparams)
12298 {
12299     jmp_buf _break_jump;
12300     alglib_impl::ae_state _alglib_env_state;
12301     alglib_impl::ae_state_init(&_alglib_env_state);
12302     if( setjmp(_break_jump) )
12303     {
12304 #if !defined(AE_NO_EXCEPTIONS)
12305         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12306 #else
12307         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12308         return;
12309 #endif
12310     }
12311     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12312     if( _xparams.flags!=0x0 )
12313         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12314     alglib_impl::cmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
12315     alglib_impl::ae_state_clear(&_alglib_env_state);
12316     return;
12317 }
12318 
12319 /*************************************************************************
12320 Triangular matrix inverse (complex)
12321 
12322 The subroutine inverts the following types of matrices:
12323     * upper triangular
12324     * upper triangular with unit diagonal
12325     * lower triangular
12326     * lower triangular with unit diagonal
12327 
12328 In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
12329 also be upper (lower) triangular, and after the end of the algorithm,  the
12330 inverse matrix replaces the source matrix. The elements  below (above) the
12331 main diagonal are not changed by the algorithm.
12332 
12333 If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
12334 diagonal, and the diagonal elements are not passed to the algorithm.
12335 
12336 Input parameters:
12337     A       -   matrix, array[0..N-1, 0..N-1].
12338     N       -   size of matrix A (optional) :
12339                 * if given, only principal NxN submatrix is processed  and
12340                   overwritten. other elements are unchanged.
12341                 * if not given,  size  is  automatically  determined  from
12342                   matrix size (A must be square matrix)
12343     IsUpper -   True, if the matrix is upper triangular.
12344     IsUnit  -   diagonal type (optional):
12345                 * if True, matrix has unit diagonal (a[i,i] are NOT used)
12346                 * if False, matrix diagonal is arbitrary
12347                 * if not given, False is assumed
12348 
12349 Output parameters:
12350     Info    -   same as for RMatrixLUInverse
12351     Rep     -   same as for RMatrixLUInverse
12352     A       -   same as for RMatrixLUInverse.
12353 
12354   ! FREE EDITION OF ALGLIB:
12355   !
12356   ! Free Edition of ALGLIB supports following important features for  this
12357   ! function:
12358   ! * C++ version: x64 SIMD support using C++ intrinsics
12359   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
12360   !
12361   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
12362   ! Reference Manual in order  to  find  out  how to activate SIMD support
12363   ! in ALGLIB.
12364 
12365   ! COMMERCIAL EDITION OF ALGLIB:
12366   !
12367   ! Commercial Edition of ALGLIB includes following important improvements
12368   ! of this function:
12369   ! * high-performance native backend with same C# interface (C# version)
12370   ! * multithreading support (C++ and C# versions)
12371   ! * hardware vendor (Intel) implementations of linear algebra primitives
12372   !   (C++ and C# versions, x86/x64 platform)
12373   !
12374   ! We recommend you to read 'Working with commercial version' section  of
12375   ! ALGLIB Reference Manual in order to find out how to  use  performance-
12376   ! related features provided by commercial edition of ALGLIB.
12377 
12378   -- ALGLIB --
12379      Copyright 05.02.2010 by Bochkanov Sergey
12380 *************************************************************************/
12381 #if !defined(AE_NO_EXCEPTIONS)
cmatrixtrinverse(complex_2d_array & a,const bool isupper,ae_int_t & info,matinvreport & rep,const xparams _xparams)12382 void cmatrixtrinverse(complex_2d_array &a, const bool isupper, ae_int_t &info, matinvreport &rep, const xparams _xparams)
12383 {
12384     jmp_buf _break_jump;
12385     alglib_impl::ae_state _alglib_env_state;
12386     ae_int_t n;
12387     bool isunit;
12388     if( (a.cols()!=a.rows()))
12389         _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixtrinverse': looks like one of arguments has wrong size");
12390     n = a.cols();
12391     isunit = false;
12392     alglib_impl::ae_state_init(&_alglib_env_state);
12393     if( setjmp(_break_jump) )
12394         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12395     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12396     if( _xparams.flags!=0x0 )
12397         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12398     alglib_impl::cmatrixtrinverse(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, isunit, &info, const_cast<alglib_impl::matinvreport*>(rep.c_ptr()), &_alglib_env_state);
12399 
12400     alglib_impl::ae_state_clear(&_alglib_env_state);
12401     return;
12402 }
12403 #endif
12404 #endif
12405 
12406 #if defined(AE_COMPILE_INVERSEUPDATE) || !defined(AE_PARTIAL_BUILD)
12407 /*************************************************************************
12408 Inverse matrix update by the Sherman-Morrison formula
12409 
12410 The algorithm updates matrix A^-1 when adding a number to an element
12411 of matrix A.
12412 
12413 Input parameters:
12414     InvA    -   inverse of matrix A.
12415                 Array whose indexes range within [0..N-1, 0..N-1].
12416     N       -   size of matrix A.
12417     UpdRow  -   row where the element to be updated is stored.
12418     UpdColumn - column where the element to be updated is stored.
12419     UpdVal  -   a number to be added to the element.
12420 
12421 
12422 Output parameters:
12423     InvA    -   inverse of modified matrix A.
12424 
12425   -- ALGLIB --
12426      Copyright 2005 by Bochkanov Sergey
12427 *************************************************************************/
rmatrixinvupdatesimple(real_2d_array & inva,const ae_int_t n,const ae_int_t updrow,const ae_int_t updcolumn,const double updval,const xparams _xparams)12428 void rmatrixinvupdatesimple(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const ae_int_t updcolumn, const double updval, const xparams _xparams)
12429 {
12430     jmp_buf _break_jump;
12431     alglib_impl::ae_state _alglib_env_state;
12432     alglib_impl::ae_state_init(&_alglib_env_state);
12433     if( setjmp(_break_jump) )
12434     {
12435 #if !defined(AE_NO_EXCEPTIONS)
12436         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12437 #else
12438         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12439         return;
12440 #endif
12441     }
12442     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12443     if( _xparams.flags!=0x0 )
12444         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12445     alglib_impl::rmatrixinvupdatesimple(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updrow, updcolumn, updval, &_alglib_env_state);
12446     alglib_impl::ae_state_clear(&_alglib_env_state);
12447     return;
12448 }
12449 
12450 /*************************************************************************
12451 Inverse matrix update by the Sherman-Morrison formula
12452 
12453 The algorithm updates matrix A^-1 when adding a vector to a row
12454 of matrix A.
12455 
12456 Input parameters:
12457     InvA    -   inverse of matrix A.
12458                 Array whose indexes range within [0..N-1, 0..N-1].
12459     N       -   size of matrix A.
12460     UpdRow  -   the row of A whose vector V was added.
12461                 0 <= Row <= N-1
12462     V       -   the vector to be added to a row.
12463                 Array whose index ranges within [0..N-1].
12464 
12465 Output parameters:
12466     InvA    -   inverse of modified matrix A.
12467 
12468   -- ALGLIB --
12469      Copyright 2005 by Bochkanov Sergey
12470 *************************************************************************/
rmatrixinvupdaterow(real_2d_array & inva,const ae_int_t n,const ae_int_t updrow,const real_1d_array & v,const xparams _xparams)12471 void rmatrixinvupdaterow(real_2d_array &inva, const ae_int_t n, const ae_int_t updrow, const real_1d_array &v, const xparams _xparams)
12472 {
12473     jmp_buf _break_jump;
12474     alglib_impl::ae_state _alglib_env_state;
12475     alglib_impl::ae_state_init(&_alglib_env_state);
12476     if( setjmp(_break_jump) )
12477     {
12478 #if !defined(AE_NO_EXCEPTIONS)
12479         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12480 #else
12481         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12482         return;
12483 #endif
12484     }
12485     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12486     if( _xparams.flags!=0x0 )
12487         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12488     alglib_impl::rmatrixinvupdaterow(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updrow, const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &_alglib_env_state);
12489     alglib_impl::ae_state_clear(&_alglib_env_state);
12490     return;
12491 }
12492 
12493 /*************************************************************************
12494 Inverse matrix update by the Sherman-Morrison formula
12495 
12496 The algorithm updates matrix A^-1 when adding a vector to a column
12497 of matrix A.
12498 
12499 Input parameters:
12500     InvA        -   inverse of matrix A.
12501                     Array whose indexes range within [0..N-1, 0..N-1].
12502     N           -   size of matrix A.
12503     UpdColumn   -   the column of A whose vector U was added.
12504                     0 <= UpdColumn <= N-1
12505     U           -   the vector to be added to a column.
12506                     Array whose index ranges within [0..N-1].
12507 
12508 Output parameters:
12509     InvA        -   inverse of modified matrix A.
12510 
12511   -- ALGLIB --
12512      Copyright 2005 by Bochkanov Sergey
12513 *************************************************************************/
rmatrixinvupdatecolumn(real_2d_array & inva,const ae_int_t n,const ae_int_t updcolumn,const real_1d_array & u,const xparams _xparams)12514 void rmatrixinvupdatecolumn(real_2d_array &inva, const ae_int_t n, const ae_int_t updcolumn, const real_1d_array &u, const xparams _xparams)
12515 {
12516     jmp_buf _break_jump;
12517     alglib_impl::ae_state _alglib_env_state;
12518     alglib_impl::ae_state_init(&_alglib_env_state);
12519     if( setjmp(_break_jump) )
12520     {
12521 #if !defined(AE_NO_EXCEPTIONS)
12522         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12523 #else
12524         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12525         return;
12526 #endif
12527     }
12528     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12529     if( _xparams.flags!=0x0 )
12530         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12531     alglib_impl::rmatrixinvupdatecolumn(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, updcolumn, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), &_alglib_env_state);
12532     alglib_impl::ae_state_clear(&_alglib_env_state);
12533     return;
12534 }
12535 
12536 /*************************************************************************
12537 Inverse matrix update by the Sherman-Morrison formula
12538 
12539 The algorithm computes the inverse of matrix A+u*v' by using the given matrix
12540 A^-1 and the vectors u and v.
12541 
12542 Input parameters:
12543     InvA    -   inverse of matrix A.
12544                 Array whose indexes range within [0..N-1, 0..N-1].
12545     N       -   size of matrix A.
12546     U       -   the vector modifying the matrix.
12547                 Array whose index ranges within [0..N-1].
12548     V       -   the vector modifying the matrix.
12549                 Array whose index ranges within [0..N-1].
12550 
12551 Output parameters:
12552     InvA - inverse of matrix A + u*v'.
12553 
12554   -- ALGLIB --
12555      Copyright 2005 by Bochkanov Sergey
12556 *************************************************************************/
rmatrixinvupdateuv(real_2d_array & inva,const ae_int_t n,const real_1d_array & u,const real_1d_array & v,const xparams _xparams)12557 void rmatrixinvupdateuv(real_2d_array &inva, const ae_int_t n, const real_1d_array &u, const real_1d_array &v, const xparams _xparams)
12558 {
12559     jmp_buf _break_jump;
12560     alglib_impl::ae_state _alglib_env_state;
12561     alglib_impl::ae_state_init(&_alglib_env_state);
12562     if( setjmp(_break_jump) )
12563     {
12564 #if !defined(AE_NO_EXCEPTIONS)
12565         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12566 #else
12567         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12568         return;
12569 #endif
12570     }
12571     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12572     if( _xparams.flags!=0x0 )
12573         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12574     alglib_impl::rmatrixinvupdateuv(const_cast<alglib_impl::ae_matrix*>(inva.c_ptr()), n, const_cast<alglib_impl::ae_vector*>(u.c_ptr()), const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &_alglib_env_state);
12575     alglib_impl::ae_state_clear(&_alglib_env_state);
12576     return;
12577 }
12578 #endif
12579 
12580 #if defined(AE_COMPILE_SCHUR) || !defined(AE_PARTIAL_BUILD)
12581 /*************************************************************************
12582 Subroutine performing the Schur decomposition of a general matrix by using
12583 the QR algorithm with multiple shifts.
12584 
12585 COMMERCIAL EDITION OF ALGLIB:
12586 
12587   ! Commercial version of ALGLIB includes one  important  improvement   of
12588   ! this function, which can be used from C++ and C#:
12589   ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
12590   !
12591   ! Intel MKL gives approximately constant  (with  respect  to  number  of
12592   ! worker threads) acceleration factor which depends on CPU  being  used,
12593   ! problem  size  and  "baseline"  ALGLIB  edition  which  is  used   for
12594   ! comparison.
12595   !
12596   ! Multithreaded acceleration is NOT supported for this function.
12597   !
12598   ! We recommend you to read 'Working with commercial version' section  of
12599   ! ALGLIB Reference Manual in order to find out how to  use  performance-
12600   ! related features provided by commercial edition of ALGLIB.
12601 
12602 The source matrix A is represented as S'*A*S = T, where S is an orthogonal
12603 matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of
12604 sizes 1x1 and 2x2 on the main diagonal).
12605 
12606 Input parameters:
12607     A   -   matrix to be decomposed.
12608             Array whose indexes range within [0..N-1, 0..N-1].
12609     N   -   size of A, N>=0.
12610 
12611 
12612 Output parameters:
12613     A   -   contains matrix T.
12614             Array whose indexes range within [0..N-1, 0..N-1].
12615     S   -   contains Schur vectors.
12616             Array whose indexes range within [0..N-1, 0..N-1].
12617 
12618 Note 1:
12619     The block structure of matrix T can be easily recognized: since all
12620     the elements below the blocks are zeros, the elements a[i+1,i] which
12621     are equal to 0 show the block border.
12622 
12623 Note 2:
12624     The algorithm performance depends on the value of the internal parameter
12625     NS of the InternalSchurDecomposition subroutine which defines the number
12626     of shifts in the QR algorithm (similarly to the block width in block-matrix
12627     algorithms in linear algebra). If you require maximum performance on
12628     your machine, it is recommended to adjust this parameter manually.
12629 
12630 Result:
12631     True,
12632         if the algorithm has converged and parameters A and S contain the result.
12633     False,
12634         if the algorithm has not converged.
12635 
12636 Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library).
12637 *************************************************************************/
rmatrixschur(real_2d_array & a,const ae_int_t n,real_2d_array & s,const xparams _xparams)12638 bool rmatrixschur(real_2d_array &a, const ae_int_t n, real_2d_array &s, const xparams _xparams)
12639 {
12640     jmp_buf _break_jump;
12641     alglib_impl::ae_state _alglib_env_state;
12642     alglib_impl::ae_state_init(&_alglib_env_state);
12643     if( setjmp(_break_jump) )
12644     {
12645 #if !defined(AE_NO_EXCEPTIONS)
12646         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12647 #else
12648         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12649         return 0;
12650 #endif
12651     }
12652     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12653     if( _xparams.flags!=0x0 )
12654         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12655     ae_bool result = alglib_impl::rmatrixschur(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, const_cast<alglib_impl::ae_matrix*>(s.c_ptr()), &_alglib_env_state);
12656     alglib_impl::ae_state_clear(&_alglib_env_state);
12657     return *(reinterpret_cast<bool*>(&result));
12658 }
12659 #endif
12660 
12661 #if defined(AE_COMPILE_SPDGEVD) || !defined(AE_PARTIAL_BUILD)
12662 /*************************************************************************
12663 Algorithm for solving the following generalized symmetric positive-definite
12664 eigenproblem:
12665     A*x = lambda*B*x (1) or
12666     A*B*x = lambda*x (2) or
12667     B*A*x = lambda*x (3).
12668 where A is a symmetric matrix, B - symmetric positive-definite matrix.
12669 The problem is solved by reducing it to an ordinary  symmetric  eigenvalue
12670 problem.
12671 
12672 Input parameters:
12673     A           -   symmetric matrix which is given by its upper or lower
12674                     triangular part.
12675                     Array whose indexes range within [0..N-1, 0..N-1].
12676     N           -   size of matrices A and B.
12677     IsUpperA    -   storage format of matrix A.
12678     B           -   symmetric positive-definite matrix which is given by
12679                     its upper or lower triangular part.
12680                     Array whose indexes range within [0..N-1, 0..N-1].
12681     IsUpperB    -   storage format of matrix B.
12682     ZNeeded     -   if ZNeeded is equal to:
12683                      * 0, the eigenvectors are not returned;
12684                      * 1, the eigenvectors are returned.
12685     ProblemType -   if ProblemType is equal to:
12686                      * 1, the following problem is solved: A*x = lambda*B*x;
12687                      * 2, the following problem is solved: A*B*x = lambda*x;
12688                      * 3, the following problem is solved: B*A*x = lambda*x.
12689 
12690 Output parameters:
12691     D           -   eigenvalues in ascending order.
12692                     Array whose index ranges within [0..N-1].
12693     Z           -   if ZNeeded is equal to:
12694                      * 0, Z hasn't changed;
12695                      * 1, Z contains eigenvectors.
12696                     Array whose indexes range within [0..N-1, 0..N-1].
12697                     The eigenvectors are stored in matrix columns. It should
12698                     be noted that the eigenvectors in such problems do not
12699                     form an orthogonal system.
12700 
12701 Result:
12702     True, if the problem was solved successfully.
12703     False, if the error occurred during the Cholesky decomposition of matrix
12704     B (the matrix isn't positive-definite) or during the work of the iterative
12705     algorithm for solving the symmetric eigenproblem.
12706 
12707 See also the GeneralizedSymmetricDefiniteEVDReduce subroutine.
12708 
12709   -- ALGLIB --
12710      Copyright 1.28.2006 by Bochkanov Sergey
12711 *************************************************************************/
smatrixgevd(const real_2d_array & a,const ae_int_t n,const bool isuppera,const real_2d_array & b,const bool isupperb,const ae_int_t zneeded,const ae_int_t problemtype,real_1d_array & d,real_2d_array & z,const xparams _xparams)12712 bool smatrixgevd(const real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t zneeded, const ae_int_t problemtype, real_1d_array &d, real_2d_array &z, const xparams _xparams)
12713 {
12714     jmp_buf _break_jump;
12715     alglib_impl::ae_state _alglib_env_state;
12716     alglib_impl::ae_state_init(&_alglib_env_state);
12717     if( setjmp(_break_jump) )
12718     {
12719 #if !defined(AE_NO_EXCEPTIONS)
12720         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12721 #else
12722         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12723         return 0;
12724 #endif
12725     }
12726     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12727     if( _xparams.flags!=0x0 )
12728         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12729     ae_bool result = alglib_impl::smatrixgevd(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isuppera, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), isupperb, zneeded, problemtype, const_cast<alglib_impl::ae_vector*>(d.c_ptr()), const_cast<alglib_impl::ae_matrix*>(z.c_ptr()), &_alglib_env_state);
12730     alglib_impl::ae_state_clear(&_alglib_env_state);
12731     return *(reinterpret_cast<bool*>(&result));
12732 }
12733 
12734 /*************************************************************************
12735 Algorithm for reduction of the following generalized symmetric positive-
12736 definite eigenvalue problem:
12737     A*x = lambda*B*x (1) or
12738     A*B*x = lambda*x (2) or
12739     B*A*x = lambda*x (3)
12740 to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and
12741 the given problems are the same, and the eigenvectors of the given problem
12742 could be obtained by multiplying the obtained eigenvectors by the
12743 transformation matrix x = R*y).
12744 
12745 Here A is a symmetric matrix, B - symmetric positive-definite matrix.
12746 
12747 Input parameters:
12748     A           -   symmetric matrix which is given by its upper or lower
12749                     triangular part.
12750                     Array whose indexes range within [0..N-1, 0..N-1].
12751     N           -   size of matrices A and B.
12752     IsUpperA    -   storage format of matrix A.
12753     B           -   symmetric positive-definite matrix which is given by
12754                     its upper or lower triangular part.
12755                     Array whose indexes range within [0..N-1, 0..N-1].
12756     IsUpperB    -   storage format of matrix B.
12757     ProblemType -   if ProblemType is equal to:
12758                      * 1, the following problem is solved: A*x = lambda*B*x;
12759                      * 2, the following problem is solved: A*B*x = lambda*x;
12760                      * 3, the following problem is solved: B*A*x = lambda*x.
12761 
12762 Output parameters:
12763     A           -   symmetric matrix which is given by its upper or lower
12764                     triangle depending on IsUpperA. Contains matrix C.
12765                     Array whose indexes range within [0..N-1, 0..N-1].
12766     R           -   upper triangular or low triangular transformation matrix
12767                     which is used to obtain the eigenvectors of a given problem
12768                     as the product of eigenvectors of C (from the right) and
12769                     matrix R (from the left). If the matrix is upper
12770                     triangular, the elements below the main diagonal
12771                     are equal to 0 (and vice versa). Thus, we can perform
12772                     the multiplication without taking into account the
12773                     internal structure (which is an easier though less
12774                     effective way).
12775                     Array whose indexes range within [0..N-1, 0..N-1].
12776     IsUpperR    -   type of matrix R (upper or lower triangular).
12777 
12778 Result:
12779     True, if the problem was reduced successfully.
12780     False, if the error occurred during the Cholesky decomposition of
12781         matrix B (the matrix is not positive-definite).
12782 
12783   -- ALGLIB --
12784      Copyright 1.28.2006 by Bochkanov Sergey
12785 *************************************************************************/
smatrixgevdreduce(real_2d_array & a,const ae_int_t n,const bool isuppera,const real_2d_array & b,const bool isupperb,const ae_int_t problemtype,real_2d_array & r,bool & isupperr,const xparams _xparams)12786 bool smatrixgevdreduce(real_2d_array &a, const ae_int_t n, const bool isuppera, const real_2d_array &b, const bool isupperb, const ae_int_t problemtype, real_2d_array &r, bool &isupperr, const xparams _xparams)
12787 {
12788     jmp_buf _break_jump;
12789     alglib_impl::ae_state _alglib_env_state;
12790     alglib_impl::ae_state_init(&_alglib_env_state);
12791     if( setjmp(_break_jump) )
12792     {
12793 #if !defined(AE_NO_EXCEPTIONS)
12794         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12795 #else
12796         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12797         return 0;
12798 #endif
12799     }
12800     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12801     if( _xparams.flags!=0x0 )
12802         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12803     ae_bool result = alglib_impl::smatrixgevdreduce(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isuppera, const_cast<alglib_impl::ae_matrix*>(b.c_ptr()), isupperb, problemtype, const_cast<alglib_impl::ae_matrix*>(r.c_ptr()), &isupperr, &_alglib_env_state);
12804     alglib_impl::ae_state_clear(&_alglib_env_state);
12805     return *(reinterpret_cast<bool*>(&result));
12806 }
12807 #endif
12808 
12809 #if defined(AE_COMPILE_MATDET) || !defined(AE_PARTIAL_BUILD)
12810 /*************************************************************************
12811 Determinant calculation of the matrix given by its LU decomposition.
12812 
12813 Input parameters:
12814     A       -   LU decomposition of the matrix (output of
12815                 RMatrixLU subroutine).
12816     Pivots  -   table of permutations which were made during
12817                 the LU decomposition.
12818                 Output of RMatrixLU subroutine.
12819     N       -   (optional) size of matrix A:
12820                 * if given, only principal NxN submatrix is processed and
12821                   overwritten. other elements are unchanged.
12822                 * if not given, automatically determined from matrix size
12823                   (A must be square matrix)
12824 
12825 Result: matrix determinant.
12826 
12827   -- ALGLIB --
12828      Copyright 2005 by Bochkanov Sergey
12829 *************************************************************************/
rmatrixludet(const real_2d_array & a,const integer_1d_array & pivots,const ae_int_t n,const xparams _xparams)12830 double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, const xparams _xparams)
12831 {
12832     jmp_buf _break_jump;
12833     alglib_impl::ae_state _alglib_env_state;
12834     alglib_impl::ae_state_init(&_alglib_env_state);
12835     if( setjmp(_break_jump) )
12836     {
12837 #if !defined(AE_NO_EXCEPTIONS)
12838         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12839 #else
12840         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12841         return 0;
12842 #endif
12843     }
12844     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12845     if( _xparams.flags!=0x0 )
12846         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12847     double result = alglib_impl::rmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
12848     alglib_impl::ae_state_clear(&_alglib_env_state);
12849     return *(reinterpret_cast<double*>(&result));
12850 }
12851 
12852 /*************************************************************************
12853 Determinant calculation of the matrix given by its LU decomposition.
12854 
12855 Input parameters:
12856     A       -   LU decomposition of the matrix (output of
12857                 RMatrixLU subroutine).
12858     Pivots  -   table of permutations which were made during
12859                 the LU decomposition.
12860                 Output of RMatrixLU subroutine.
12861     N       -   (optional) size of matrix A:
12862                 * if given, only principal NxN submatrix is processed and
12863                   overwritten. other elements are unchanged.
12864                 * if not given, automatically determined from matrix size
12865                   (A must be square matrix)
12866 
12867 Result: matrix determinant.
12868 
12869   -- ALGLIB --
12870      Copyright 2005 by Bochkanov Sergey
12871 *************************************************************************/
12872 #if !defined(AE_NO_EXCEPTIONS)
rmatrixludet(const real_2d_array & a,const integer_1d_array & pivots,const xparams _xparams)12873 double rmatrixludet(const real_2d_array &a, const integer_1d_array &pivots, const xparams _xparams)
12874 {
12875     jmp_buf _break_jump;
12876     alglib_impl::ae_state _alglib_env_state;
12877     ae_int_t n;
12878     if( (a.rows()!=a.cols()) || (a.rows()!=pivots.length()))
12879         _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixludet': looks like one of arguments has wrong size");
12880     n = a.rows();
12881     alglib_impl::ae_state_init(&_alglib_env_state);
12882     if( setjmp(_break_jump) )
12883         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12884     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12885     if( _xparams.flags!=0x0 )
12886         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12887     double result = alglib_impl::rmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
12888 
12889     alglib_impl::ae_state_clear(&_alglib_env_state);
12890     return *(reinterpret_cast<double*>(&result));
12891 }
12892 #endif
12893 
12894 /*************************************************************************
12895 Calculation of the determinant of a general matrix
12896 
12897 Input parameters:
12898     A       -   matrix, array[0..N-1, 0..N-1]
12899     N       -   (optional) size of matrix A:
12900                 * if given, only principal NxN submatrix is processed and
12901                   overwritten. other elements are unchanged.
12902                 * if not given, automatically determined from matrix size
12903                   (A must be square matrix)
12904 
12905 Result: determinant of matrix A.
12906 
12907   -- ALGLIB --
12908      Copyright 2005 by Bochkanov Sergey
12909 *************************************************************************/
rmatrixdet(const real_2d_array & a,const ae_int_t n,const xparams _xparams)12910 double rmatrixdet(const real_2d_array &a, const ae_int_t n, const xparams _xparams)
12911 {
12912     jmp_buf _break_jump;
12913     alglib_impl::ae_state _alglib_env_state;
12914     alglib_impl::ae_state_init(&_alglib_env_state);
12915     if( setjmp(_break_jump) )
12916     {
12917 #if !defined(AE_NO_EXCEPTIONS)
12918         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12919 #else
12920         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12921         return 0;
12922 #endif
12923     }
12924     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12925     if( _xparams.flags!=0x0 )
12926         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12927     double result = alglib_impl::rmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
12928     alglib_impl::ae_state_clear(&_alglib_env_state);
12929     return *(reinterpret_cast<double*>(&result));
12930 }
12931 
12932 /*************************************************************************
12933 Calculation of the determinant of a general matrix
12934 
12935 Input parameters:
12936     A       -   matrix, array[0..N-1, 0..N-1]
12937     N       -   (optional) size of matrix A:
12938                 * if given, only principal NxN submatrix is processed and
12939                   overwritten. other elements are unchanged.
12940                 * if not given, automatically determined from matrix size
12941                   (A must be square matrix)
12942 
12943 Result: determinant of matrix A.
12944 
12945   -- ALGLIB --
12946      Copyright 2005 by Bochkanov Sergey
12947 *************************************************************************/
12948 #if !defined(AE_NO_EXCEPTIONS)
rmatrixdet(const real_2d_array & a,const xparams _xparams)12949 double rmatrixdet(const real_2d_array &a, const xparams _xparams)
12950 {
12951     jmp_buf _break_jump;
12952     alglib_impl::ae_state _alglib_env_state;
12953     ae_int_t n;
12954     if( (a.rows()!=a.cols()))
12955         _ALGLIB_CPP_EXCEPTION("Error while calling 'rmatrixdet': looks like one of arguments has wrong size");
12956     n = a.rows();
12957     alglib_impl::ae_state_init(&_alglib_env_state);
12958     if( setjmp(_break_jump) )
12959         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12960     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12961     if( _xparams.flags!=0x0 )
12962         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12963     double result = alglib_impl::rmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
12964 
12965     alglib_impl::ae_state_clear(&_alglib_env_state);
12966     return *(reinterpret_cast<double*>(&result));
12967 }
12968 #endif
12969 
12970 /*************************************************************************
12971 Determinant calculation of the matrix given by its LU decomposition.
12972 
12973 Input parameters:
12974     A       -   LU decomposition of the matrix (output of
12975                 RMatrixLU subroutine).
12976     Pivots  -   table of permutations which were made during
12977                 the LU decomposition.
12978                 Output of RMatrixLU subroutine.
12979     N       -   (optional) size of matrix A:
12980                 * if given, only principal NxN submatrix is processed and
12981                   overwritten. other elements are unchanged.
12982                 * if not given, automatically determined from matrix size
12983                   (A must be square matrix)
12984 
12985 Result: matrix determinant.
12986 
12987   -- ALGLIB --
12988      Copyright 2005 by Bochkanov Sergey
12989 *************************************************************************/
cmatrixludet(const complex_2d_array & a,const integer_1d_array & pivots,const ae_int_t n,const xparams _xparams)12990 alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots, const ae_int_t n, const xparams _xparams)
12991 {
12992     jmp_buf _break_jump;
12993     alglib_impl::ae_state _alglib_env_state;
12994     alglib_impl::ae_state_init(&_alglib_env_state);
12995     if( setjmp(_break_jump) )
12996     {
12997 #if !defined(AE_NO_EXCEPTIONS)
12998         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12999 #else
13000         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13001         return 0;
13002 #endif
13003     }
13004     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13005     if( _xparams.flags!=0x0 )
13006         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13007     alglib_impl::ae_complex result = alglib_impl::cmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
13008     alglib_impl::ae_state_clear(&_alglib_env_state);
13009     return *(reinterpret_cast<alglib::complex*>(&result));
13010 }
13011 
13012 /*************************************************************************
13013 Determinant calculation of the matrix given by its LU decomposition.
13014 
13015 Input parameters:
13016     A       -   LU decomposition of the matrix (output of
13017                 RMatrixLU subroutine).
13018     Pivots  -   table of permutations which were made during
13019                 the LU decomposition.
13020                 Output of RMatrixLU subroutine.
13021     N       -   (optional) size of matrix A:
13022                 * if given, only principal NxN submatrix is processed and
13023                   overwritten. other elements are unchanged.
13024                 * if not given, automatically determined from matrix size
13025                   (A must be square matrix)
13026 
13027 Result: matrix determinant.
13028 
13029   -- ALGLIB --
13030      Copyright 2005 by Bochkanov Sergey
13031 *************************************************************************/
13032 #if !defined(AE_NO_EXCEPTIONS)
cmatrixludet(const complex_2d_array & a,const integer_1d_array & pivots,const xparams _xparams)13033 alglib::complex cmatrixludet(const complex_2d_array &a, const integer_1d_array &pivots, const xparams _xparams)
13034 {
13035     jmp_buf _break_jump;
13036     alglib_impl::ae_state _alglib_env_state;
13037     ae_int_t n;
13038     if( (a.rows()!=a.cols()) || (a.rows()!=pivots.length()))
13039         _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixludet': looks like one of arguments has wrong size");
13040     n = a.rows();
13041     alglib_impl::ae_state_init(&_alglib_env_state);
13042     if( setjmp(_break_jump) )
13043         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13044     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13045     if( _xparams.flags!=0x0 )
13046         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13047     alglib_impl::ae_complex result = alglib_impl::cmatrixludet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(pivots.c_ptr()), n, &_alglib_env_state);
13048 
13049     alglib_impl::ae_state_clear(&_alglib_env_state);
13050     return *(reinterpret_cast<alglib::complex*>(&result));
13051 }
13052 #endif
13053 
13054 /*************************************************************************
13055 Calculation of the determinant of a general matrix
13056 
13057 Input parameters:
13058     A       -   matrix, array[0..N-1, 0..N-1]
13059     N       -   (optional) size of matrix A:
13060                 * if given, only principal NxN submatrix is processed and
13061                   overwritten. other elements are unchanged.
13062                 * if not given, automatically determined from matrix size
13063                   (A must be square matrix)
13064 
13065 Result: determinant of matrix A.
13066 
13067   -- ALGLIB --
13068      Copyright 2005 by Bochkanov Sergey
13069 *************************************************************************/
cmatrixdet(const complex_2d_array & a,const ae_int_t n,const xparams _xparams)13070 alglib::complex cmatrixdet(const complex_2d_array &a, const ae_int_t n, const xparams _xparams)
13071 {
13072     jmp_buf _break_jump;
13073     alglib_impl::ae_state _alglib_env_state;
13074     alglib_impl::ae_state_init(&_alglib_env_state);
13075     if( setjmp(_break_jump) )
13076     {
13077 #if !defined(AE_NO_EXCEPTIONS)
13078         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13079 #else
13080         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13081         return 0;
13082 #endif
13083     }
13084     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13085     if( _xparams.flags!=0x0 )
13086         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13087     alglib_impl::ae_complex result = alglib_impl::cmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
13088     alglib_impl::ae_state_clear(&_alglib_env_state);
13089     return *(reinterpret_cast<alglib::complex*>(&result));
13090 }
13091 
13092 /*************************************************************************
13093 Calculation of the determinant of a general matrix
13094 
13095 Input parameters:
13096     A       -   matrix, array[0..N-1, 0..N-1]
13097     N       -   (optional) size of matrix A:
13098                 * if given, only principal NxN submatrix is processed and
13099                   overwritten. other elements are unchanged.
13100                 * if not given, automatically determined from matrix size
13101                   (A must be square matrix)
13102 
13103 Result: determinant of matrix A.
13104 
13105   -- ALGLIB --
13106      Copyright 2005 by Bochkanov Sergey
13107 *************************************************************************/
13108 #if !defined(AE_NO_EXCEPTIONS)
cmatrixdet(const complex_2d_array & a,const xparams _xparams)13109 alglib::complex cmatrixdet(const complex_2d_array &a, const xparams _xparams)
13110 {
13111     jmp_buf _break_jump;
13112     alglib_impl::ae_state _alglib_env_state;
13113     ae_int_t n;
13114     if( (a.rows()!=a.cols()))
13115         _ALGLIB_CPP_EXCEPTION("Error while calling 'cmatrixdet': looks like one of arguments has wrong size");
13116     n = a.rows();
13117     alglib_impl::ae_state_init(&_alglib_env_state);
13118     if( setjmp(_break_jump) )
13119         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13120     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13121     if( _xparams.flags!=0x0 )
13122         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13123     alglib_impl::ae_complex result = alglib_impl::cmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
13124 
13125     alglib_impl::ae_state_clear(&_alglib_env_state);
13126     return *(reinterpret_cast<alglib::complex*>(&result));
13127 }
13128 #endif
13129 
13130 /*************************************************************************
13131 Determinant calculation of the matrix given by the Cholesky decomposition.
13132 
13133 Input parameters:
13134     A       -   Cholesky decomposition,
13135                 output of SMatrixCholesky subroutine.
13136     N       -   (optional) size of matrix A:
13137                 * if given, only principal NxN submatrix is processed and
13138                   overwritten. other elements are unchanged.
13139                 * if not given, automatically determined from matrix size
13140                   (A must be square matrix)
13141 
13142 As the determinant is equal to the product of squares of diagonal elements,
13143 it's not necessary to specify which triangle - lower or upper - the matrix
13144 is stored in.
13145 
13146 Result:
13147     matrix determinant.
13148 
13149   -- ALGLIB --
13150      Copyright 2005-2008 by Bochkanov Sergey
13151 *************************************************************************/
spdmatrixcholeskydet(const real_2d_array & a,const ae_int_t n,const xparams _xparams)13152 double spdmatrixcholeskydet(const real_2d_array &a, const ae_int_t n, const xparams _xparams)
13153 {
13154     jmp_buf _break_jump;
13155     alglib_impl::ae_state _alglib_env_state;
13156     alglib_impl::ae_state_init(&_alglib_env_state);
13157     if( setjmp(_break_jump) )
13158     {
13159 #if !defined(AE_NO_EXCEPTIONS)
13160         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13161 #else
13162         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13163         return 0;
13164 #endif
13165     }
13166     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13167     if( _xparams.flags!=0x0 )
13168         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13169     double result = alglib_impl::spdmatrixcholeskydet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
13170     alglib_impl::ae_state_clear(&_alglib_env_state);
13171     return *(reinterpret_cast<double*>(&result));
13172 }
13173 
13174 /*************************************************************************
13175 Determinant calculation of the matrix given by the Cholesky decomposition.
13176 
13177 Input parameters:
13178     A       -   Cholesky decomposition,
13179                 output of SMatrixCholesky subroutine.
13180     N       -   (optional) size of matrix A:
13181                 * if given, only principal NxN submatrix is processed and
13182                   overwritten. other elements are unchanged.
13183                 * if not given, automatically determined from matrix size
13184                   (A must be square matrix)
13185 
13186 As the determinant is equal to the product of squares of diagonal elements,
13187 it's not necessary to specify which triangle - lower or upper - the matrix
13188 is stored in.
13189 
13190 Result:
13191     matrix determinant.
13192 
13193   -- ALGLIB --
13194      Copyright 2005-2008 by Bochkanov Sergey
13195 *************************************************************************/
13196 #if !defined(AE_NO_EXCEPTIONS)
spdmatrixcholeskydet(const real_2d_array & a,const xparams _xparams)13197 double spdmatrixcholeskydet(const real_2d_array &a, const xparams _xparams)
13198 {
13199     jmp_buf _break_jump;
13200     alglib_impl::ae_state _alglib_env_state;
13201     ae_int_t n;
13202     if( (a.rows()!=a.cols()))
13203         _ALGLIB_CPP_EXCEPTION("Error while calling 'spdmatrixcholeskydet': looks like one of arguments has wrong size");
13204     n = a.rows();
13205     alglib_impl::ae_state_init(&_alglib_env_state);
13206     if( setjmp(_break_jump) )
13207         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13208     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13209     if( _xparams.flags!=0x0 )
13210         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13211     double result = alglib_impl::spdmatrixcholeskydet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, &_alglib_env_state);
13212 
13213     alglib_impl::ae_state_clear(&_alglib_env_state);
13214     return *(reinterpret_cast<double*>(&result));
13215 }
13216 #endif
13217 
13218 /*************************************************************************
13219 Determinant calculation of the symmetric positive definite matrix.
13220 
13221 Input parameters:
13222     A       -   matrix. Array with elements [0..N-1, 0..N-1].
13223     N       -   (optional) size of matrix A:
13224                 * if given, only principal NxN submatrix is processed and
13225                   overwritten. other elements are unchanged.
13226                 * if not given, automatically determined from matrix size
13227                   (A must be square matrix)
13228     IsUpper -   (optional) storage type:
13229                 * if True, symmetric matrix  A  is  given  by  its  upper
13230                   triangle, and the lower triangle isn't used/changed  by
13231                   function
13232                 * if False, symmetric matrix  A  is  given  by  its lower
13233                   triangle, and the upper triangle isn't used/changed  by
13234                   function
13235                 * if not given, both lower and upper  triangles  must  be
13236                   filled.
13237 
13238 Result:
13239     determinant of matrix A.
13240     If matrix A is not positive definite, exception is thrown.
13241 
13242   -- ALGLIB --
13243      Copyright 2005-2008 by Bochkanov Sergey
13244 *************************************************************************/
spdmatrixdet(const real_2d_array & a,const ae_int_t n,const bool isupper,const xparams _xparams)13245 double spdmatrixdet(const real_2d_array &a, const ae_int_t n, const bool isupper, const xparams _xparams)
13246 {
13247     jmp_buf _break_jump;
13248     alglib_impl::ae_state _alglib_env_state;
13249     alglib_impl::ae_state_init(&_alglib_env_state);
13250     if( setjmp(_break_jump) )
13251     {
13252 #if !defined(AE_NO_EXCEPTIONS)
13253         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13254 #else
13255         _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13256         return 0;
13257 #endif
13258     }
13259     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13260     if( _xparams.flags!=0x0 )
13261         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13262     double result = alglib_impl::spdmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
13263     alglib_impl::ae_state_clear(&_alglib_env_state);
13264     return *(reinterpret_cast<double*>(&result));
13265 }
13266 
13267 /*************************************************************************
13268 Determinant calculation of the symmetric positive definite matrix.
13269 
13270 Input parameters:
13271     A       -   matrix. Array with elements [0..N-1, 0..N-1].
13272     N       -   (optional) size of matrix A:
13273                 * if given, only principal NxN submatrix is processed and
13274                   overwritten. other elements are unchanged.
13275                 * if not given, automatically determined from matrix size
13276                   (A must be square matrix)
13277     IsUpper -   (optional) storage type:
13278                 * if True, symmetric matrix  A  is  given  by  its  upper
13279                   triangle, and the lower triangle isn't used/changed  by
13280                   function
13281                 * if False, symmetric matrix  A  is  given  by  its lower
13282                   triangle, and the upper triangle isn't used/changed  by
13283                   function
13284                 * if not given, both lower and upper  triangles  must  be
13285                   filled.
13286 
13287 Result:
13288     determinant of matrix A.
13289     If matrix A is not positive definite, exception is thrown.
13290 
13291   -- ALGLIB --
13292      Copyright 2005-2008 by Bochkanov Sergey
13293 *************************************************************************/
13294 #if !defined(AE_NO_EXCEPTIONS)
spdmatrixdet(const real_2d_array & a,const xparams _xparams)13295 double spdmatrixdet(const real_2d_array &a, const xparams _xparams)
13296 {
13297     jmp_buf _break_jump;
13298     alglib_impl::ae_state _alglib_env_state;
13299     ae_int_t n;
13300     bool isupper;
13301     if( (a.rows()!=a.cols()))
13302         _ALGLIB_CPP_EXCEPTION("Error while calling 'spdmatrixdet': looks like one of arguments has wrong size");
13303     if( !alglib_impl::ae_is_symmetric(const_cast<alglib_impl::ae_matrix*>(a.c_ptr())) )
13304         _ALGLIB_CPP_EXCEPTION("'a' parameter is not symmetric matrix");
13305     n = a.rows();
13306     isupper = false;
13307     alglib_impl::ae_state_init(&_alglib_env_state);
13308     if( setjmp(_break_jump) )
13309         _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13310     ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13311     if( _xparams.flags!=0x0 )
13312         ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13313     double result = alglib_impl::spdmatrixdet(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), n, isupper, &_alglib_env_state);
13314 
13315     alglib_impl::ae_state_clear(&_alglib_env_state);
13316     return *(reinterpret_cast<double*>(&result));
13317 }
13318 #endif
13319 #endif
13320 }
13321 
13322 /////////////////////////////////////////////////////////////////////////
13323 //
13324 // THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
13325 //
13326 /////////////////////////////////////////////////////////////////////////
13327 namespace alglib_impl
13328 {
13329 #if defined(AE_COMPILE_ABLAS) || !defined(AE_PARTIAL_BUILD)
13330 static ae_int_t ablas_blas2minvendorkernelsize = 8;
13331 static void ablas_ablasinternalsplitlength(ae_int_t n,
13332      ae_int_t nb,
13333      ae_int_t* n1,
13334      ae_int_t* n2,
13335      ae_state *_state);
13336 static void ablas_cmatrixrighttrsm2(ae_int_t m,
13337      ae_int_t n,
13338      /* Complex */ ae_matrix* a,
13339      ae_int_t i1,
13340      ae_int_t j1,
13341      ae_bool isupper,
13342      ae_bool isunit,
13343      ae_int_t optype,
13344      /* Complex */ ae_matrix* x,
13345      ae_int_t i2,
13346      ae_int_t j2,
13347      ae_state *_state);
13348 static void ablas_cmatrixlefttrsm2(ae_int_t m,
13349      ae_int_t n,
13350      /* Complex */ ae_matrix* a,
13351      ae_int_t i1,
13352      ae_int_t j1,
13353      ae_bool isupper,
13354      ae_bool isunit,
13355      ae_int_t optype,
13356      /* Complex */ ae_matrix* x,
13357      ae_int_t i2,
13358      ae_int_t j2,
13359      ae_state *_state);
13360 static void ablas_rmatrixrighttrsm2(ae_int_t m,
13361      ae_int_t n,
13362      /* Real    */ ae_matrix* a,
13363      ae_int_t i1,
13364      ae_int_t j1,
13365      ae_bool isupper,
13366      ae_bool isunit,
13367      ae_int_t optype,
13368      /* Real    */ ae_matrix* x,
13369      ae_int_t i2,
13370      ae_int_t j2,
13371      ae_state *_state);
13372 static void ablas_rmatrixlefttrsm2(ae_int_t m,
13373      ae_int_t n,
13374      /* Real    */ ae_matrix* a,
13375      ae_int_t i1,
13376      ae_int_t j1,
13377      ae_bool isupper,
13378      ae_bool isunit,
13379      ae_int_t optype,
13380      /* Real    */ ae_matrix* x,
13381      ae_int_t i2,
13382      ae_int_t j2,
13383      ae_state *_state);
13384 static void ablas_cmatrixherk2(ae_int_t n,
13385      ae_int_t k,
13386      double alpha,
13387      /* Complex */ ae_matrix* a,
13388      ae_int_t ia,
13389      ae_int_t ja,
13390      ae_int_t optypea,
13391      double beta,
13392      /* Complex */ ae_matrix* c,
13393      ae_int_t ic,
13394      ae_int_t jc,
13395      ae_bool isupper,
13396      ae_state *_state);
13397 static void ablas_rmatrixsyrk2(ae_int_t n,
13398      ae_int_t k,
13399      double alpha,
13400      /* Real    */ ae_matrix* a,
13401      ae_int_t ia,
13402      ae_int_t ja,
13403      ae_int_t optypea,
13404      double beta,
13405      /* Real    */ ae_matrix* c,
13406      ae_int_t ic,
13407      ae_int_t jc,
13408      ae_bool isupper,
13409      ae_state *_state);
13410 static void ablas_cmatrixgemmrec(ae_int_t m,
13411      ae_int_t n,
13412      ae_int_t k,
13413      ae_complex alpha,
13414      /* Complex */ ae_matrix* a,
13415      ae_int_t ia,
13416      ae_int_t ja,
13417      ae_int_t optypea,
13418      /* Complex */ ae_matrix* b,
13419      ae_int_t ib,
13420      ae_int_t jb,
13421      ae_int_t optypeb,
13422      ae_complex beta,
13423      /* Complex */ ae_matrix* c,
13424      ae_int_t ic,
13425      ae_int_t jc,
13426      ae_state *_state);
13427 ae_bool _trypexec_ablas_cmatrixgemmrec(ae_int_t m,
13428     ae_int_t n,
13429     ae_int_t k,
13430     ae_complex alpha,
13431     /* Complex */ ae_matrix* a,
13432     ae_int_t ia,
13433     ae_int_t ja,
13434     ae_int_t optypea,
13435     /* Complex */ ae_matrix* b,
13436     ae_int_t ib,
13437     ae_int_t jb,
13438     ae_int_t optypeb,
13439     ae_complex beta,
13440     /* Complex */ ae_matrix* c,
13441     ae_int_t ic,
13442     ae_int_t jc, ae_state *_state);
13443 static void ablas_rmatrixgemmrec(ae_int_t m,
13444      ae_int_t n,
13445      ae_int_t k,
13446      double alpha,
13447      /* Real    */ ae_matrix* a,
13448      ae_int_t ia,
13449      ae_int_t ja,
13450      ae_int_t optypea,
13451      /* Real    */ ae_matrix* b,
13452      ae_int_t ib,
13453      ae_int_t jb,
13454      ae_int_t optypeb,
13455      double beta,
13456      /* Real    */ ae_matrix* c,
13457      ae_int_t ic,
13458      ae_int_t jc,
13459      ae_state *_state);
13460 ae_bool _trypexec_ablas_rmatrixgemmrec(ae_int_t m,
13461     ae_int_t n,
13462     ae_int_t k,
13463     double alpha,
13464     /* Real    */ ae_matrix* a,
13465     ae_int_t ia,
13466     ae_int_t ja,
13467     ae_int_t optypea,
13468     /* Real    */ ae_matrix* b,
13469     ae_int_t ib,
13470     ae_int_t jb,
13471     ae_int_t optypeb,
13472     double beta,
13473     /* Real    */ ae_matrix* c,
13474     ae_int_t ic,
13475     ae_int_t jc, ae_state *_state);
13476 
13477 
13478 #endif
13479 #if defined(AE_COMPILE_ORTFAC) || !defined(AE_PARTIAL_BUILD)
13480 static void ortfac_cmatrixqrbasecase(/* Complex */ ae_matrix* a,
13481      ae_int_t m,
13482      ae_int_t n,
13483      /* Complex */ ae_vector* work,
13484      /* Complex */ ae_vector* t,
13485      /* Complex */ ae_vector* tau,
13486      ae_state *_state);
13487 static void ortfac_cmatrixlqbasecase(/* Complex */ ae_matrix* a,
13488      ae_int_t m,
13489      ae_int_t n,
13490      /* Complex */ ae_vector* work,
13491      /* Complex */ ae_vector* t,
13492      /* Complex */ ae_vector* tau,
13493      ae_state *_state);
13494 static void ortfac_rmatrixblockreflector(/* Real    */ ae_matrix* a,
13495      /* Real    */ ae_vector* tau,
13496      ae_bool columnwisea,
13497      ae_int_t lengtha,
13498      ae_int_t blocksize,
13499      /* Real    */ ae_matrix* t,
13500      /* Real    */ ae_vector* work,
13501      ae_state *_state);
13502 static void ortfac_cmatrixblockreflector(/* Complex */ ae_matrix* a,
13503      /* Complex */ ae_vector* tau,
13504      ae_bool columnwisea,
13505      ae_int_t lengtha,
13506      ae_int_t blocksize,
13507      /* Complex */ ae_matrix* t,
13508      /* Complex */ ae_vector* work,
13509      ae_state *_state);
13510 
13511 
13512 #endif
13513 #if defined(AE_COMPILE_MATGEN) || !defined(AE_PARTIAL_BUILD)
13514 
13515 
13516 #endif
13517 #if defined(AE_COMPILE_SPARSE) || !defined(AE_PARTIAL_BUILD)
13518 static double sparse_desiredloadfactor = 0.66;
13519 static double sparse_maxloadfactor = 0.75;
13520 static double sparse_growfactor = 2.00;
13521 static ae_int_t sparse_additional = 10;
13522 static ae_int_t sparse_linalgswitch = 16;
13523 static ae_int_t sparse_hash(ae_int_t i,
13524      ae_int_t j,
13525      ae_int_t tabsize,
13526      ae_state *_state);
13527 
13528 
13529 #endif
13530 #if defined(AE_COMPILE_HSSCHUR) || !defined(AE_PARTIAL_BUILD)
13531 static void hsschur_internalauxschur(ae_bool wantt,
13532      ae_bool wantz,
13533      ae_int_t n,
13534      ae_int_t ilo,
13535      ae_int_t ihi,
13536      /* Real    */ ae_matrix* h,
13537      /* Real    */ ae_vector* wr,
13538      /* Real    */ ae_vector* wi,
13539      ae_int_t iloz,
13540      ae_int_t ihiz,
13541      /* Real    */ ae_matrix* z,
13542      /* Real    */ ae_vector* work,
13543      /* Real    */ ae_vector* workv3,
13544      /* Real    */ ae_vector* workc1,
13545      /* Real    */ ae_vector* works1,
13546      ae_int_t* info,
13547      ae_state *_state);
13548 static void hsschur_aux2x2schur(double* a,
13549      double* b,
13550      double* c,
13551      double* d,
13552      double* rt1r,
13553      double* rt1i,
13554      double* rt2r,
13555      double* rt2i,
13556      double* cs,
13557      double* sn,
13558      ae_state *_state);
13559 static double hsschur_extschursign(double a, double b, ae_state *_state);
13560 static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state);
13561 
13562 
13563 #endif
13564 #if defined(AE_COMPILE_EVD) || !defined(AE_PARTIAL_BUILD)
13565 static ae_int_t evd_stepswithintol = 2;
13566 static void evd_clearrfields(eigsubspacestate* state, ae_state *_state);
13567 static ae_bool evd_tridiagonalevd(/* Real    */ ae_vector* d,
13568      /* Real    */ ae_vector* e,
13569      ae_int_t n,
13570      ae_int_t zneeded,
13571      /* Real    */ ae_matrix* z,
13572      ae_state *_state);
13573 static void evd_tdevde2(double a,
13574      double b,
13575      double c,
13576      double* rt1,
13577      double* rt2,
13578      ae_state *_state);
13579 static void evd_tdevdev2(double a,
13580      double b,
13581      double c,
13582      double* rt1,
13583      double* rt2,
13584      double* cs1,
13585      double* sn1,
13586      ae_state *_state);
13587 static double evd_tdevdpythag(double a, double b, ae_state *_state);
13588 static double evd_tdevdextsign(double a, double b, ae_state *_state);
13589 static ae_bool evd_internalbisectioneigenvalues(/* Real    */ ae_vector* d,
13590      /* Real    */ ae_vector* e,
13591      ae_int_t n,
13592      ae_int_t irange,
13593      ae_int_t iorder,
13594      double vl,
13595      double vu,
13596      ae_int_t il,
13597      ae_int_t iu,
13598      double abstol,
13599      /* Real    */ ae_vector* w,
13600      ae_int_t* m,
13601      ae_int_t* nsplit,
13602      /* Integer */ ae_vector* iblock,
13603      /* Integer */ ae_vector* isplit,
13604      ae_int_t* errorcode,
13605      ae_state *_state);
13606 static void evd_internaldstein(ae_int_t n,
13607      /* Real    */ ae_vector* d,
13608      /* Real    */ ae_vector* e,
13609      ae_int_t m,
13610      /* Real    */ ae_vector* w,
13611      /* Integer */ ae_vector* iblock,
13612      /* Integer */ ae_vector* isplit,
13613      /* Real    */ ae_matrix* z,
13614      /* Integer */ ae_vector* ifail,
13615      ae_int_t* info,
13616      ae_state *_state);
13617 static void evd_tdininternaldlagtf(ae_int_t n,
13618      /* Real    */ ae_vector* a,
13619      double lambdav,
13620      /* Real    */ ae_vector* b,
13621      /* Real    */ ae_vector* c,
13622      double tol,
13623      /* Real    */ ae_vector* d,
13624      /* Integer */ ae_vector* iin,
13625      ae_int_t* info,
13626      ae_state *_state);
13627 static void evd_tdininternaldlagts(ae_int_t n,
13628      /* Real    */ ae_vector* a,
13629      /* Real    */ ae_vector* b,
13630      /* Real    */ ae_vector* c,
13631      /* Real    */ ae_vector* d,
13632      /* Integer */ ae_vector* iin,
13633      /* Real    */ ae_vector* y,
13634      double* tol,
13635      ae_int_t* info,
13636      ae_state *_state);
13637 static void evd_internaldlaebz(ae_int_t ijob,
13638      ae_int_t nitmax,
13639      ae_int_t n,
13640      ae_int_t mmax,
13641      ae_int_t minp,
13642      double abstol,
13643      double reltol,
13644      double pivmin,
13645      /* Real    */ ae_vector* d,
13646      /* Real    */ ae_vector* e,
13647      /* Real    */ ae_vector* e2,
13648      /* Integer */ ae_vector* nval,
13649      /* Real    */ ae_matrix* ab,
13650      /* Real    */ ae_vector* c,
13651      ae_int_t* mout,
13652      /* Integer */ ae_matrix* nab,
13653      /* Real    */ ae_vector* work,
13654      /* Integer */ ae_vector* iwork,
13655      ae_int_t* info,
13656      ae_state *_state);
13657 static void evd_rmatrixinternaltrevc(/* Real    */ ae_matrix* t,
13658      ae_int_t n,
13659      ae_int_t side,
13660      ae_int_t howmny,
13661      /* Boolean */ ae_vector* vselect,
13662      /* Real    */ ae_matrix* vl,
13663      /* Real    */ ae_matrix* vr,
13664      ae_int_t* m,
13665      ae_int_t* info,
13666      ae_state *_state);
13667 static void evd_internaltrevc(/* Real    */ ae_matrix* t,
13668      ae_int_t n,
13669      ae_int_t side,
13670      ae_int_t howmny,
13671      /* Boolean */ ae_vector* vselect,
13672      /* Real    */ ae_matrix* vl,
13673      /* Real    */ ae_matrix* vr,
13674      ae_int_t* m,
13675      ae_int_t* info,
13676      ae_state *_state);
13677 static void evd_internalhsevdlaln2(ae_bool ltrans,
13678      ae_int_t na,
13679      ae_int_t nw,
13680      double smin,
13681      double ca,
13682      /* Real    */ ae_matrix* a,
13683      double d1,
13684      double d2,
13685      /* Real    */ ae_matrix* b,
13686      double wr,
13687      double wi,
13688      /* Boolean */ ae_vector* rswap4,
13689      /* Boolean */ ae_vector* zswap4,
13690      /* Integer */ ae_matrix* ipivot44,
13691      /* Real    */ ae_vector* civ4,
13692      /* Real    */ ae_vector* crv4,
13693      /* Real    */ ae_matrix* x,
13694      double* scl,
13695      double* xnorm,
13696      ae_int_t* info,
13697      ae_state *_state);
13698 static void evd_internalhsevdladiv(double a,
13699      double b,
13700      double c,
13701      double d,
13702      double* p,
13703      double* q,
13704      ae_state *_state);
13705 
13706 
13707 #endif
13708 #if defined(AE_COMPILE_DLU) || !defined(AE_PARTIAL_BUILD)
13709 static void dlu_cmatrixlup2(/* Complex */ ae_matrix* a,
13710      ae_int_t offs,
13711      ae_int_t m,
13712      ae_int_t n,
13713      /* Integer */ ae_vector* pivots,
13714      /* Complex */ ae_vector* tmp,
13715      ae_state *_state);
13716 static void dlu_rmatrixlup2(/* Real    */ ae_matrix* a,
13717      ae_int_t offs,
13718      ae_int_t m,
13719      ae_int_t n,
13720      /* Integer */ ae_vector* pivots,
13721      /* Real    */ ae_vector* tmp,
13722      ae_state *_state);
13723 static void dlu_cmatrixplu2(/* Complex */ ae_matrix* a,
13724      ae_int_t offs,
13725      ae_int_t m,
13726      ae_int_t n,
13727      /* Integer */ ae_vector* pivots,
13728      /* Complex */ ae_vector* tmp,
13729      ae_state *_state);
13730 static void dlu_rmatrixplu2(/* Real    */ ae_matrix* a,
13731      ae_int_t offs,
13732      ae_int_t m,
13733      ae_int_t n,
13734      /* Integer */ ae_vector* pivots,
13735      /* Real    */ ae_vector* tmp,
13736      ae_state *_state);
13737 
13738 
13739 #endif
13740 #if defined(AE_COMPILE_SPTRF) || !defined(AE_PARTIAL_BUILD)
13741 static double sptrf_densebnd = 0.10;
13742 static ae_int_t sptrf_slswidth = 8;
13743 static void sptrf_sluv2list1init(ae_int_t n,
13744      sluv2list1matrix* a,
13745      ae_state *_state);
13746 static void sptrf_sluv2list1swap(sluv2list1matrix* a,
13747      ae_int_t i,
13748      ae_int_t j,
13749      ae_state *_state);
13750 static void sptrf_sluv2list1dropsequence(sluv2list1matrix* a,
13751      ae_int_t i,
13752      ae_state *_state);
13753 static void sptrf_sluv2list1appendsequencetomatrix(sluv2list1matrix* a,
13754      ae_int_t src,
13755      ae_bool hasdiagonal,
13756      double d,
13757      ae_int_t nzmax,
13758      sparsematrix* s,
13759      ae_int_t dst,
13760      ae_state *_state);
13761 static void sptrf_sluv2list1pushsparsevector(sluv2list1matrix* a,
13762      /* Integer */ ae_vector* si,
13763      /* Real    */ ae_vector* sv,
13764      ae_int_t nz,
13765      ae_state *_state);
13766 static void sptrf_densetrailinit(sluv2densetrail* d,
13767      ae_int_t n,
13768      ae_state *_state);
13769 static void sptrf_densetrailappendcolumn(sluv2densetrail* d,
13770      /* Real    */ ae_vector* x,
13771      ae_int_t id,
13772      ae_state *_state);
13773 static void sptrf_sparsetrailinit(sparsematrix* s,
13774      sluv2sparsetrail* a,
13775      ae_state *_state);
13776 static ae_bool sptrf_sparsetrailfindpivot(sluv2sparsetrail* a,
13777      ae_int_t pivottype,
13778      ae_int_t* ipiv,
13779      ae_int_t* jpiv,
13780      ae_state *_state);
13781 static void sptrf_sparsetrailpivotout(sluv2sparsetrail* a,
13782      ae_int_t ipiv,
13783      ae_int_t jpiv,
13784      double* uu,
13785      /* Integer */ ae_vector* v0i,
13786      /* Real    */ ae_vector* v0r,
13787      ae_int_t* nz0,
13788      /* Integer */ ae_vector* v1i,
13789      /* Real    */ ae_vector* v1r,
13790      ae_int_t* nz1,
13791      ae_state *_state);
13792 static void sptrf_sparsetraildensify(sluv2sparsetrail* a,
13793      ae_int_t i1,
13794      sluv2list1matrix* bupper,
13795      sluv2densetrail* dtrail,
13796      ae_state *_state);
13797 static void sptrf_sparsetrailupdate(sluv2sparsetrail* a,
13798      /* Integer */ ae_vector* v0i,
13799      /* Real    */ ae_vector* v0r,
13800      ae_int_t nz0,
13801      /* Integer */ ae_vector* v1i,
13802      /* Real    */ ae_vector* v1r,
13803      ae_int_t nz1,
13804      sluv2list1matrix* bupper,
13805      sluv2densetrail* dtrail,
13806      ae_bool densificationsupported,
13807      ae_state *_state);
13808 
13809 
13810 #endif
13811 #if defined(AE_COMPILE_AMDORDERING) || !defined(AE_PARTIAL_BUILD)
13812 static ae_int_t amdordering_knsheadersize = 2;
13813 static ae_int_t amdordering_llmentrysize = 6;
13814 static void amdordering_nsinitemptyslow(ae_int_t n,
13815      amdnset* sa,
13816      ae_state *_state);
13817 static void amdordering_nscopy(amdnset* ssrc,
13818      amdnset* sdst,
13819      ae_state *_state);
13820 static void amdordering_nsaddelement(amdnset* sa,
13821      ae_int_t k,
13822      ae_state *_state);
13823 static void amdordering_nsaddkth(amdnset* sa,
13824      amdknset* src,
13825      ae_int_t k,
13826      ae_state *_state);
13827 static void amdordering_nssubtract1(amdnset* sa,
13828      amdnset* src,
13829      ae_state *_state);
13830 static void amdordering_nssubtractkth(amdnset* sa,
13831      amdknset* src,
13832      ae_int_t k,
13833      ae_state *_state);
13834 static void amdordering_nsclear(amdnset* sa, ae_state *_state);
13835 static ae_int_t amdordering_nscount(amdnset* sa, ae_state *_state);
13836 static ae_int_t amdordering_nscountnotkth(amdnset* sa,
13837      amdknset* src,
13838      ae_int_t k,
13839      ae_state *_state);
13840 static ae_int_t amdordering_nscountandkth(amdnset* sa,
13841      amdknset* src,
13842      ae_int_t k,
13843      ae_state *_state);
13844 static ae_bool amdordering_nsequal(amdnset* s0,
13845      amdnset* s1,
13846      ae_state *_state);
13847 static void amdordering_nsstartenumeration(amdnset* sa, ae_state *_state);
13848 static ae_bool amdordering_nsenumerate(amdnset* sa,
13849      ae_int_t* i,
13850      ae_state *_state);
13851 static void amdordering_knscompressstorage(amdknset* sa, ae_state *_state);
13852 static void amdordering_knsreallocate(amdknset* sa,
13853      ae_int_t setidx,
13854      ae_int_t newallocated,
13855      ae_state *_state);
13856 static void amdordering_knsinit(ae_int_t k,
13857      ae_int_t n,
13858      ae_int_t kprealloc,
13859      amdknset* sa,
13860      ae_state *_state);
13861 static void amdordering_knsinitfroma(sparsematrix* a,
13862      ae_int_t n,
13863      amdknset* sa,
13864      ae_state *_state);
13865 static void amdordering_knsstartenumeration(amdknset* sa,
13866      ae_int_t i,
13867      ae_state *_state);
13868 static ae_bool amdordering_knsenumerate(amdknset* sa,
13869      ae_int_t* i,
13870      ae_state *_state);
13871 static void amdordering_knsdirectaccess(amdknset* sa,
13872      ae_int_t k,
13873      ae_int_t* idxbegin,
13874      ae_int_t* idxend,
13875      ae_state *_state);
13876 static void amdordering_knsaddnewelement(amdknset* sa,
13877      ae_int_t i,
13878      ae_int_t k,
13879      ae_state *_state);
13880 static void amdordering_knssubtract1(amdknset* sa,
13881      ae_int_t i,
13882      amdnset* src,
13883      ae_state *_state);
13884 static void amdordering_knsaddkthdistinct(amdknset* sa,
13885      ae_int_t i,
13886      amdknset* src,
13887      ae_int_t k,
13888      ae_state *_state);
13889 static ae_int_t amdordering_knscountkth(amdknset* s0,
13890      ae_int_t k,
13891      ae_state *_state);
13892 static ae_int_t amdordering_knscountnot(amdknset* s0,
13893      ae_int_t i,
13894      amdnset* s1,
13895      ae_state *_state);
13896 static ae_int_t amdordering_knscountnotkth(amdknset* s0,
13897      ae_int_t i,
13898      amdknset* s1,
13899      ae_int_t k,
13900      ae_state *_state);
13901 static ae_int_t amdordering_knscountandkth(amdknset* s0,
13902      ae_int_t i,
13903      amdknset* s1,
13904      ae_int_t k,
13905      ae_state *_state);
13906 static ae_int_t amdordering_knssumkth(amdknset* s0,
13907      ae_int_t i,
13908      ae_state *_state);
13909 static void amdordering_knsclearkthnoreclaim(amdknset* sa,
13910      ae_int_t k,
13911      ae_state *_state);
13912 static void amdordering_knsclearkthreclaim(amdknset* sa,
13913      ae_int_t k,
13914      ae_state *_state);
13915 static void amdordering_mtxinit(ae_int_t n,
13916      amdllmatrix* a,
13917      ae_state *_state);
13918 static void amdordering_mtxaddcolumnto(amdllmatrix* a,
13919      ae_int_t j,
13920      amdnset* s,
13921      ae_state *_state);
13922 static void amdordering_mtxinsertnewelement(amdllmatrix* a,
13923      ae_int_t i,
13924      ae_int_t j,
13925      ae_state *_state);
13926 static ae_int_t amdordering_mtxcountcolumnnot(amdllmatrix* a,
13927      ae_int_t j,
13928      amdnset* s,
13929      ae_state *_state);
13930 static ae_int_t amdordering_mtxcountcolumn(amdllmatrix* a,
13931      ae_int_t j,
13932      ae_state *_state);
13933 static void amdordering_mtxclearx(amdllmatrix* a,
13934      ae_int_t k,
13935      ae_bool iscol,
13936      ae_state *_state);
13937 static void amdordering_mtxclearcolumn(amdllmatrix* a,
13938      ae_int_t j,
13939      ae_state *_state);
13940 static void amdordering_mtxclearrow(amdllmatrix* a,
13941      ae_int_t j,
13942      ae_state *_state);
13943 static void amdordering_vtxinit(sparsematrix* a,
13944      ae_int_t n,
13945      ae_bool checkexactdegrees,
13946      amdvertexset* s,
13947      ae_state *_state);
13948 static void amdordering_vtxremovevertex(amdvertexset* s,
13949      ae_int_t p,
13950      ae_state *_state);
13951 static ae_int_t amdordering_vtxgetapprox(amdvertexset* s,
13952      ae_int_t p,
13953      ae_state *_state);
13954 static ae_int_t amdordering_vtxgetexact(amdvertexset* s,
13955      ae_int_t p,
13956      ae_state *_state);
13957 static ae_int_t amdordering_vtxgetapproxmindegree(amdvertexset* s,
13958      ae_state *_state);
13959 static void amdordering_vtxupdateapproximatedegree(amdvertexset* s,
13960      ae_int_t p,
13961      ae_int_t dnew,
13962      ae_state *_state);
13963 static void amdordering_vtxupdateexactdegree(amdvertexset* s,
13964      ae_int_t p,
13965      ae_int_t d,
13966      ae_state *_state);
13967 static void amdordering_amdselectpivotelement(amdbuffer* buf,
13968      ae_int_t k,
13969      ae_int_t* p,
13970      ae_int_t* nodesize,
13971      ae_state *_state);
13972 static void amdordering_amdcomputelp(amdbuffer* buf,
13973      ae_int_t p,
13974      ae_state *_state);
13975 static void amdordering_amdmasselimination(amdbuffer* buf,
13976      ae_int_t p,
13977      ae_int_t k,
13978      ae_int_t tau,
13979      ae_state *_state);
13980 static void amdordering_amddetectsupernodes(amdbuffer* buf,
13981      ae_state *_state);
13982 
13983 
13984 #endif
13985 #if defined(AE_COMPILE_SPCHOL) || !defined(AE_PARTIAL_BUILD)
13986 static ae_int_t spchol_maxsupernode = 4;
13987 static double spchol_maxmergeinefficiency = 0.25;
13988 static ae_int_t spchol_smallfakestolerance = 2;
13989 static ae_int_t spchol_maxfastkernel = 4;
13990 static ae_bool spchol_relaxedsupernodes = ae_true;
13991 #ifdef ALGLIB_NO_FAST_KERNELS
13992 static ae_int_t spchol_spsymmgetmaxsimd(ae_state *_state);
13993 #endif
13994 #ifdef ALGLIB_NO_FAST_KERNELS
13995 static void spchol_propagatefwd(/* Real    */ ae_vector* x,
13996      ae_int_t cols0,
13997      ae_int_t blocksize,
13998      /* Integer */ ae_vector* superrowidx,
13999      ae_int_t rbase,
14000      ae_int_t offdiagsize,
14001      /* Real    */ ae_vector* rowstorage,
14002      ae_int_t offss,
14003      ae_int_t sstride,
14004      /* Real    */ ae_vector* simdbuf,
14005      ae_int_t simdwidth,
14006      ae_state *_state);
14007 #endif
14008 static void spchol_generatedbgpermutation(sparsematrix* a,
14009      ae_int_t n,
14010      /* Integer */ ae_vector* perm,
14011      /* Integer */ ae_vector* invperm,
14012      ae_state *_state);
14013 static void spchol_buildunorderedetree(sparsematrix* a,
14014      ae_int_t n,
14015      /* Integer */ ae_vector* parent,
14016      /* Integer */ ae_vector* tabove,
14017      ae_state *_state);
14018 static void spchol_fromparenttochildren(/* Integer */ ae_vector* parent,
14019      ae_int_t n,
14020      /* Integer */ ae_vector* childrenr,
14021      /* Integer */ ae_vector* childreni,
14022      /* Integer */ ae_vector* ttmp0,
14023      ae_state *_state);
14024 static void spchol_buildorderedetree(sparsematrix* a,
14025      ae_int_t n,
14026      /* Integer */ ae_vector* parent,
14027      /* Integer */ ae_vector* supernodalpermutation,
14028      /* Integer */ ae_vector* invsupernodalpermutation,
14029      /* Integer */ ae_vector* trawparentofrawnode,
14030      /* Integer */ ae_vector* trawparentofreorderednode,
14031      /* Integer */ ae_vector* ttmp,
14032      /* Boolean */ ae_vector* tflagarray,
14033      ae_state *_state);
14034 static void spchol_createsupernodalstructure(sparsematrix* at,
14035      /* Integer */ ae_vector* parent,
14036      ae_int_t n,
14037      spcholanalysis* analysis,
14038      /* Integer */ ae_vector* node2supernode,
14039      /* Integer */ ae_vector* tchildrenr,
14040      /* Integer */ ae_vector* tchildreni,
14041      /* Integer */ ae_vector* tparentnodeofsupernode,
14042      /* Integer */ ae_vector* tfakenonzeros,
14043      /* Integer */ ae_vector* ttmp0,
14044      /* Boolean */ ae_vector* tflagarray,
14045      ae_state *_state);
14046 static void spchol_analyzesupernodaldependencies(spcholanalysis* analysis,
14047      sparsematrix* rawa,
14048      /* Integer */ ae_vector* node2supernode,
14049      ae_int_t n,
14050      /* Integer */ ae_vector* ttmp0,
14051      /* Integer */ ae_vector* ttmp1,
14052      /* Boolean */ ae_vector* tflagarray,
14053      ae_state *_state);
14054 static void spchol_loadmatrix(spcholanalysis* analysis,
14055      sparsematrix* at,
14056      ae_state *_state);
14057 static void spchol_extractmatrix(spcholanalysis* analysis,
14058      /* Integer */ ae_vector* offsets,
14059      /* Integer */ ae_vector* strides,
14060      /* Real    */ ae_vector* rowstorage,
14061      /* Real    */ ae_vector* diagd,
14062      ae_int_t n,
14063      sparsematrix* a,
14064      /* Real    */ ae_vector* d,
14065      /* Integer */ ae_vector* p,
14066      /* Integer */ ae_vector* tmpp,
14067      ae_state *_state);
14068 static void spchol_partialcholeskypattern(sparsematrix* a,
14069      ae_int_t head,
14070      ae_int_t tail,
14071      sparsematrix* atail,
14072      /* Integer */ ae_vector* tmpparent,
14073      /* Integer */ ae_vector* tmpchildrenr,
14074      /* Integer */ ae_vector* tmpchildreni,
14075      /* Integer */ ae_vector* tmp1,
14076      /* Boolean */ ae_vector* flagarray,
14077      sparsematrix* tmpbottomt,
14078      sparsematrix* tmpupdatet,
14079      sparsematrix* tmpupdate,
14080      sparsematrix* tmpnewtailt,
14081      ae_state *_state);
14082 static void spchol_topologicalpermutation(sparsematrix* a,
14083      /* Integer */ ae_vector* p,
14084      sparsematrix* b,
14085      ae_state *_state);
14086 static ae_int_t spchol_computenonzeropattern(sparsematrix* wrkat,
14087      ae_int_t columnidx,
14088      ae_int_t n,
14089      /* Integer */ ae_vector* superrowridx,
14090      /* Integer */ ae_vector* superrowidx,
14091      ae_int_t nsuper,
14092      /* Integer */ ae_vector* childrennodesr,
14093      /* Integer */ ae_vector* childrennodesi,
14094      /* Integer */ ae_vector* node2supernode,
14095      /* Boolean */ ae_vector* truearray,
14096      /* Integer */ ae_vector* tmp0,
14097      ae_state *_state);
14098 static ae_int_t spchol_updatesupernode(spcholanalysis* analysis,
14099      ae_int_t sidx,
14100      ae_int_t cols0,
14101      ae_int_t cols1,
14102      ae_int_t offss,
14103      /* Integer */ ae_vector* raw2smap,
14104      ae_int_t uidx,
14105      ae_int_t wrkrow,
14106      /* Real    */ ae_vector* diagd,
14107      ae_int_t offsd,
14108      ae_state *_state);
14109 static ae_bool spchol_factorizesupernode(spcholanalysis* analysis,
14110      ae_int_t sidx,
14111      ae_state *_state);
14112 static ae_int_t spchol_recommendedstridefor(ae_int_t rowsize,
14113      ae_state *_state);
14114 static ae_int_t spchol_alignpositioninarray(ae_int_t offs,
14115      ae_state *_state);
14116 #ifdef ALGLIB_NO_FAST_KERNELS
14117 static ae_bool spchol_updatekernel4444(/* Real    */ ae_vector* rowstorage,
14118      ae_int_t offss,
14119      ae_int_t sheight,
14120      ae_int_t offsu,
14121      ae_int_t uheight,
14122      /* Real    */ ae_vector* diagd,
14123      ae_int_t offsd,
14124      /* Integer */ ae_vector* raw2smap,
14125      /* Integer */ ae_vector* superrowidx,
14126      ae_int_t urbase,
14127      ae_state *_state);
14128 #endif
14129 #ifdef ALGLIB_NO_FAST_KERNELS
14130 static ae_bool spchol_updatekernelabc4(/* Real    */ ae_vector* rowstorage,
14131      ae_int_t offss,
14132      ae_int_t twidth,
14133      ae_int_t offsu,
14134      ae_int_t uheight,
14135      ae_int_t urank,
14136      ae_int_t urowstride,
14137      ae_int_t uwidth,
14138      /* Real    */ ae_vector* diagd,
14139      ae_int_t offsd,
14140      /* Integer */ ae_vector* raw2smap,
14141      /* Integer */ ae_vector* superrowidx,
14142      ae_int_t urbase,
14143      ae_state *_state);
14144 #endif
14145 static ae_bool spchol_updatekernelrank1(/* Real    */ ae_vector* rowstorage,
14146      ae_int_t offss,
14147      ae_int_t twidth,
14148      ae_int_t trowstride,
14149      ae_int_t offsu,
14150      ae_int_t uheight,
14151      ae_int_t uwidth,
14152      /* Real    */ ae_vector* diagd,
14153      ae_int_t offsd,
14154      /* Integer */ ae_vector* raw2smap,
14155      /* Integer */ ae_vector* superrowidx,
14156      ae_int_t urbase,
14157      ae_state *_state);
14158 static ae_bool spchol_updatekernelrank2(/* Real    */ ae_vector* rowstorage,
14159      ae_int_t offss,
14160      ae_int_t twidth,
14161      ae_int_t trowstride,
14162      ae_int_t offsu,
14163      ae_int_t uheight,
14164      ae_int_t uwidth,
14165      /* Real    */ ae_vector* diagd,
14166      ae_int_t offsd,
14167      /* Integer */ ae_vector* raw2smap,
14168      /* Integer */ ae_vector* superrowidx,
14169      ae_int_t urbase,
14170      ae_state *_state);
14171 static void spchol_slowdebugchecks(sparsematrix* a,
14172      /* Integer */ ae_vector* fillinperm,
14173      ae_int_t n,
14174      ae_int_t tail,
14175      sparsematrix* referencetaila,
14176      ae_state *_state);
14177 static ae_bool spchol_dbgmatrixcholesky2(/* Real    */ ae_matrix* aaa,
14178      ae_int_t offs,
14179      ae_int_t n,
14180      ae_bool isupper,
14181      ae_state *_state);
14182 
14183 
14184 #endif
14185 #if defined(AE_COMPILE_TRFAC) || !defined(AE_PARTIAL_BUILD)
14186 static ae_bool trfac_hpdmatrixcholeskyrec(/* Complex */ ae_matrix* a,
14187      ae_int_t offs,
14188      ae_int_t n,
14189      ae_bool isupper,
14190      /* Complex */ ae_vector* tmp,
14191      ae_state *_state);
14192 static ae_bool trfac_hpdmatrixcholesky2(/* Complex */ ae_matrix* aaa,
14193      ae_int_t offs,
14194      ae_int_t n,
14195      ae_bool isupper,
14196      /* Complex */ ae_vector* tmp,
14197      ae_state *_state);
14198 static ae_bool trfac_spdmatrixcholesky2(/* Real    */ ae_matrix* aaa,
14199      ae_int_t offs,
14200      ae_int_t n,
14201      ae_bool isupper,
14202      /* Real    */ ae_vector* tmp,
14203      ae_state *_state);
14204 
14205 
14206 #endif
14207 #if defined(AE_COMPILE_BDSVD) || !defined(AE_PARTIAL_BUILD)
14208 static ae_bool bdsvd_bidiagonalsvddecompositioninternal(/* Real    */ ae_vector* d,
14209      /* Real    */ ae_vector* e,
14210      ae_int_t n,
14211      ae_bool isupper,
14212      ae_bool isfractionalaccuracyrequired,
14213      /* Real    */ ae_matrix* uu,
14214      ae_int_t ustart,
14215      ae_int_t nru,
14216      /* Real    */ ae_matrix* c,
14217      ae_int_t cstart,
14218      ae_int_t ncc,
14219      /* Real    */ ae_matrix* vt,
14220      ae_int_t vstart,
14221      ae_int_t ncvt,
14222      ae_state *_state);
14223 static double bdsvd_extsignbdsqr(double a, double b, ae_state *_state);
14224 static void bdsvd_svd2x2(double f,
14225      double g,
14226      double h,
14227      double* ssmin,
14228      double* ssmax,
14229      ae_state *_state);
14230 static void bdsvd_svdv2x2(double f,
14231      double g,
14232      double h,
14233      double* ssmin,
14234      double* ssmax,
14235      double* snr,
14236      double* csr,
14237      double* snl,
14238      double* csl,
14239      ae_state *_state);
14240 
14241 
14242 #endif
14243 #if defined(AE_COMPILE_SVD) || !defined(AE_PARTIAL_BUILD)
14244 
14245 
14246 #endif
14247 #if defined(AE_COMPILE_RCOND) || !defined(AE_PARTIAL_BUILD)
14248 static void rcond_rmatrixrcondtrinternal(/* Real    */ ae_matrix* a,
14249      ae_int_t n,
14250      ae_bool isupper,
14251      ae_bool isunit,
14252      ae_bool onenorm,
14253      double anorm,
14254      double* rc,
14255      ae_state *_state);
14256 static void rcond_cmatrixrcondtrinternal(/* Complex */ ae_matrix* a,
14257      ae_int_t n,
14258      ae_bool isupper,
14259      ae_bool isunit,
14260      ae_bool onenorm,
14261      double anorm,
14262      double* rc,
14263      ae_state *_state);
14264 static void rcond_spdmatrixrcondcholeskyinternal(/* Real    */ ae_matrix* cha,
14265      ae_int_t n,
14266      ae_bool isupper,
14267      ae_bool isnormprovided,
14268      double anorm,
14269      double* rc,
14270      ae_state *_state);
14271 static void rcond_hpdmatrixrcondcholeskyinternal(/* Complex */ ae_matrix* cha,
14272      ae_int_t n,
14273      ae_bool isupper,
14274      ae_bool isnormprovided,
14275      double anorm,
14276      double* rc,
14277      ae_state *_state);
14278 static void rcond_rmatrixrcondluinternal(/* Real    */ ae_matrix* lua,
14279      ae_int_t n,
14280      ae_bool onenorm,
14281      ae_bool isanormprovided,
14282      double anorm,
14283      double* rc,
14284      ae_state *_state);
14285 static void rcond_cmatrixrcondluinternal(/* Complex */ ae_matrix* lua,
14286      ae_int_t n,
14287      ae_bool onenorm,
14288      ae_bool isanormprovided,
14289      double anorm,
14290      double* rc,
14291      ae_state *_state);
14292 static void rcond_rmatrixestimatenorm(ae_int_t n,
14293      /* Real    */ ae_vector* v,
14294      /* Real    */ ae_vector* x,
14295      /* Integer */ ae_vector* isgn,
14296      double* est,
14297      ae_int_t* kase,
14298      ae_state *_state);
14299 static void rcond_cmatrixestimatenorm(ae_int_t n,
14300      /* Complex */ ae_vector* v,
14301      /* Complex */ ae_vector* x,
14302      double* est,
14303      ae_int_t* kase,
14304      /* Integer */ ae_vector* isave,
14305      /* Real    */ ae_vector* rsave,
14306      ae_state *_state);
14307 static double rcond_internalcomplexrcondscsum1(/* Complex */ ae_vector* x,
14308      ae_int_t n,
14309      ae_state *_state);
14310 static ae_int_t rcond_internalcomplexrcondicmax1(/* Complex */ ae_vector* x,
14311      ae_int_t n,
14312      ae_state *_state);
14313 static void rcond_internalcomplexrcondsaveall(/* Integer */ ae_vector* isave,
14314      /* Real    */ ae_vector* rsave,
14315      ae_int_t* i,
14316      ae_int_t* iter,
14317      ae_int_t* j,
14318      ae_int_t* jlast,
14319      ae_int_t* jump,
14320      double* absxi,
14321      double* altsgn,
14322      double* estold,
14323      double* temp,
14324      ae_state *_state);
14325 static void rcond_internalcomplexrcondloadall(/* Integer */ ae_vector* isave,
14326      /* Real    */ ae_vector* rsave,
14327      ae_int_t* i,
14328      ae_int_t* iter,
14329      ae_int_t* j,
14330      ae_int_t* jlast,
14331      ae_int_t* jump,
14332      double* absxi,
14333      double* altsgn,
14334      double* estold,
14335      double* temp,
14336      ae_state *_state);
14337 
14338 
14339 #endif
14340 #if defined(AE_COMPILE_FBLS) || !defined(AE_PARTIAL_BUILD)
14341 
14342 
14343 #endif
14344 #if defined(AE_COMPILE_NORMESTIMATOR) || !defined(AE_PARTIAL_BUILD)
14345 
14346 
14347 #endif
14348 #if defined(AE_COMPILE_MATINV) || !defined(AE_PARTIAL_BUILD)
14349 static void matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
14350      ae_int_t offs,
14351      ae_int_t n,
14352      ae_bool isupper,
14353      ae_bool isunit,
14354      /* Real    */ ae_vector* tmp,
14355      sinteger* info,
14356      ae_state *_state);
14357 ae_bool _trypexec_matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
14358     ae_int_t offs,
14359     ae_int_t n,
14360     ae_bool isupper,
14361     ae_bool isunit,
14362     /* Real    */ ae_vector* tmp,
14363     sinteger* info, ae_state *_state);
14364 static void matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
14365      ae_int_t offs,
14366      ae_int_t n,
14367      ae_bool isupper,
14368      ae_bool isunit,
14369      /* Complex */ ae_vector* tmp,
14370      sinteger* info,
14371      ae_state *_state);
14372 ae_bool _trypexec_matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
14373     ae_int_t offs,
14374     ae_int_t n,
14375     ae_bool isupper,
14376     ae_bool isunit,
14377     /* Complex */ ae_vector* tmp,
14378     sinteger* info, ae_state *_state);
14379 static void matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
14380      ae_int_t offs,
14381      ae_int_t n,
14382      /* Real    */ ae_vector* work,
14383      sinteger* info,
14384      matinvreport* rep,
14385      ae_state *_state);
14386 ae_bool _trypexec_matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
14387     ae_int_t offs,
14388     ae_int_t n,
14389     /* Real    */ ae_vector* work,
14390     sinteger* info,
14391     matinvreport* rep, ae_state *_state);
14392 static void matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
14393      ae_int_t offs,
14394      ae_int_t n,
14395      /* Complex */ ae_vector* work,
14396      sinteger* ssinfo,
14397      matinvreport* rep,
14398      ae_state *_state);
14399 ae_bool _trypexec_matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
14400     ae_int_t offs,
14401     ae_int_t n,
14402     /* Complex */ ae_vector* work,
14403     sinteger* ssinfo,
14404     matinvreport* rep, ae_state *_state);
14405 static void matinv_hpdmatrixcholeskyinverserec(/* Complex */ ae_matrix* a,
14406      ae_int_t offs,
14407      ae_int_t n,
14408      ae_bool isupper,
14409      /* Complex */ ae_vector* tmp,
14410      ae_state *_state);
14411 
14412 
14413 #endif
14414 #if defined(AE_COMPILE_INVERSEUPDATE) || !defined(AE_PARTIAL_BUILD)
14415 
14416 
14417 #endif
14418 #if defined(AE_COMPILE_SCHUR) || !defined(AE_PARTIAL_BUILD)
14419 
14420 
14421 #endif
14422 #if defined(AE_COMPILE_SPDGEVD) || !defined(AE_PARTIAL_BUILD)
14423 
14424 
14425 #endif
14426 #if defined(AE_COMPILE_MATDET) || !defined(AE_PARTIAL_BUILD)
14427 
14428 
14429 #endif
14430 
14431 #if defined(AE_COMPILE_ABLAS) || !defined(AE_PARTIAL_BUILD)
14432 
14433 
14434 /*************************************************************************
14435 Splits matrix length in two parts, left part should match ABLAS block size
14436 
14437 INPUT PARAMETERS
14438     A   -   real matrix, is passed to ensure that we didn't split
14439             complex matrix using real splitting subroutine.
14440             matrix itself is not changed.
14441     N   -   length, N>0
14442 
14443 OUTPUT PARAMETERS
14444     N1  -   length
14445     N2  -   length
14446 
14447 N1+N2=N, N1>=N2, N2 may be zero
14448 
14449   -- ALGLIB routine --
14450      15.12.2009
14451      Bochkanov Sergey
14452 *************************************************************************/
ablassplitlength(ae_matrix * a,ae_int_t n,ae_int_t * n1,ae_int_t * n2,ae_state * _state)14453 void ablassplitlength(/* Real    */ ae_matrix* a,
14454      ae_int_t n,
14455      ae_int_t* n1,
14456      ae_int_t* n2,
14457      ae_state *_state)
14458 {
14459 
14460     *n1 = 0;
14461     *n2 = 0;
14462 
14463     if( n>ablasblocksize(a, _state) )
14464     {
14465         ablas_ablasinternalsplitlength(n, ablasblocksize(a, _state), n1, n2, _state);
14466     }
14467     else
14468     {
14469         ablas_ablasinternalsplitlength(n, ablasmicroblocksize(_state), n1, n2, _state);
14470     }
14471 }
14472 
14473 
14474 /*************************************************************************
14475 Complex ABLASSplitLength
14476 
14477   -- ALGLIB routine --
14478      15.12.2009
14479      Bochkanov Sergey
14480 *************************************************************************/
ablascomplexsplitlength(ae_matrix * a,ae_int_t n,ae_int_t * n1,ae_int_t * n2,ae_state * _state)14481 void ablascomplexsplitlength(/* Complex */ ae_matrix* a,
14482      ae_int_t n,
14483      ae_int_t* n1,
14484      ae_int_t* n2,
14485      ae_state *_state)
14486 {
14487 
14488     *n1 = 0;
14489     *n2 = 0;
14490 
14491     if( n>ablascomplexblocksize(a, _state) )
14492     {
14493         ablas_ablasinternalsplitlength(n, ablascomplexblocksize(a, _state), n1, n2, _state);
14494     }
14495     else
14496     {
14497         ablas_ablasinternalsplitlength(n, ablasmicroblocksize(_state), n1, n2, _state);
14498     }
14499 }
14500 
14501 
14502 /*************************************************************************
14503 Returns switch point for parallelism.
14504 
14505   -- ALGLIB routine --
14506      15.12.2009
14507      Bochkanov Sergey
14508 *************************************************************************/
gemmparallelsize(ae_state * _state)14509 ae_int_t gemmparallelsize(ae_state *_state)
14510 {
14511     ae_int_t result;
14512 
14513 
14514     result = 64;
14515     return result;
14516 }
14517 
14518 
14519 /*************************************************************************
14520 Returns block size - subdivision size where  cache-oblivious  soubroutines
14521 switch to the optimized kernel.
14522 
14523 INPUT PARAMETERS
14524     A   -   real matrix, is passed to ensure that we didn't split
14525             complex matrix using real splitting subroutine.
14526             matrix itself is not changed.
14527 
14528   -- ALGLIB routine --
14529      15.12.2009
14530      Bochkanov Sergey
14531 *************************************************************************/
ablasblocksize(ae_matrix * a,ae_state * _state)14532 ae_int_t ablasblocksize(/* Real    */ ae_matrix* a, ae_state *_state)
14533 {
14534     ae_int_t result;
14535 
14536 
14537     result = 32;
14538     return result;
14539 }
14540 
14541 
14542 /*************************************************************************
14543 Block size for complex subroutines.
14544 
14545   -- ALGLIB routine --
14546      15.12.2009
14547      Bochkanov Sergey
14548 *************************************************************************/
ablascomplexblocksize(ae_matrix * a,ae_state * _state)14549 ae_int_t ablascomplexblocksize(/* Complex */ ae_matrix* a,
14550      ae_state *_state)
14551 {
14552     ae_int_t result;
14553 
14554 
14555     result = 24;
14556     return result;
14557 }
14558 
14559 
14560 /*************************************************************************
14561 Microblock size
14562 
14563   -- ALGLIB routine --
14564      15.12.2009
14565      Bochkanov Sergey
14566 *************************************************************************/
ablasmicroblocksize(ae_state * _state)14567 ae_int_t ablasmicroblocksize(ae_state *_state)
14568 {
14569     ae_int_t result;
14570 
14571 
14572     result = 8;
14573     return result;
14574 }
14575 
14576 
14577 /*************************************************************************
14578 Generation of an elementary reflection transformation
14579 
14580 The subroutine generates elementary reflection H of order N, so that, for
14581 a given X, the following equality holds true:
14582 
14583     ( X(1) )   ( Beta )
14584 H * (  ..  ) = (  0   )
14585     ( X(n) )   (  0   )
14586 
14587 where
14588               ( V(1) )
14589 H = 1 - Tau * (  ..  ) * ( V(1), ..., V(n) )
14590               ( V(n) )
14591 
14592 where the first component of vector V equals 1.
14593 
14594 Input parameters:
14595     X   -   vector. Array whose index ranges within [1..N].
14596     N   -   reflection order.
14597 
14598 Output parameters:
14599     X   -   components from 2 to N are replaced with vector V.
14600             The first component is replaced with parameter Beta.
14601     Tau -   scalar value Tau. If X is a null vector, Tau equals 0,
14602             otherwise 1 <= Tau <= 2.
14603 
14604 This subroutine is the modification of the DLARFG subroutines from
14605 the LAPACK library.
14606 
14607 MODIFICATIONS:
14608     24.12.2005 sign(Alpha) was replaced with an analogous to the Fortran SIGN code.
14609 
14610   -- LAPACK auxiliary routine (version 3.0) --
14611      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
14612      Courant Institute, Argonne National Lab, and Rice University
14613      September 30, 1994
14614 *************************************************************************/
generatereflection(ae_vector * x,ae_int_t n,double * tau,ae_state * _state)14615 void generatereflection(/* Real    */ ae_vector* x,
14616      ae_int_t n,
14617      double* tau,
14618      ae_state *_state)
14619 {
14620     ae_int_t j;
14621     double alpha;
14622     double xnorm;
14623     double v;
14624     double beta;
14625     double mx;
14626     double s;
14627 
14628     *tau = 0;
14629 
14630     if( n<=1 )
14631     {
14632         *tau = (double)(0);
14633         return;
14634     }
14635 
14636     /*
14637      * Scale if needed (to avoid overflow/underflow during intermediate
14638      * calculations).
14639      */
14640     mx = (double)(0);
14641     for(j=1; j<=n; j++)
14642     {
14643         mx = ae_maxreal(ae_fabs(x->ptr.p_double[j], _state), mx, _state);
14644     }
14645     s = (double)(1);
14646     if( ae_fp_neq(mx,(double)(0)) )
14647     {
14648         if( ae_fp_less_eq(mx,ae_minrealnumber/ae_machineepsilon) )
14649         {
14650             s = ae_minrealnumber/ae_machineepsilon;
14651             v = 1/s;
14652             ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v);
14653             mx = mx*v;
14654         }
14655         else
14656         {
14657             if( ae_fp_greater_eq(mx,ae_maxrealnumber*ae_machineepsilon) )
14658             {
14659                 s = ae_maxrealnumber*ae_machineepsilon;
14660                 v = 1/s;
14661                 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v);
14662                 mx = mx*v;
14663             }
14664         }
14665     }
14666 
14667     /*
14668      * XNORM = DNRM2( N-1, X, INCX )
14669      */
14670     alpha = x->ptr.p_double[1];
14671     xnorm = (double)(0);
14672     if( ae_fp_neq(mx,(double)(0)) )
14673     {
14674         for(j=2; j<=n; j++)
14675         {
14676             xnorm = xnorm+ae_sqr(x->ptr.p_double[j]/mx, _state);
14677         }
14678         xnorm = ae_sqrt(xnorm, _state)*mx;
14679     }
14680     if( ae_fp_eq(xnorm,(double)(0)) )
14681     {
14682 
14683         /*
14684          * H  =  I
14685          */
14686         *tau = (double)(0);
14687         x->ptr.p_double[1] = x->ptr.p_double[1]*s;
14688         return;
14689     }
14690 
14691     /*
14692      * general case
14693      */
14694     mx = ae_maxreal(ae_fabs(alpha, _state), ae_fabs(xnorm, _state), _state);
14695     beta = -mx*ae_sqrt(ae_sqr(alpha/mx, _state)+ae_sqr(xnorm/mx, _state), _state);
14696     if( ae_fp_less(alpha,(double)(0)) )
14697     {
14698         beta = -beta;
14699     }
14700     *tau = (beta-alpha)/beta;
14701     v = 1/(alpha-beta);
14702     ae_v_muld(&x->ptr.p_double[2], 1, ae_v_len(2,n), v);
14703     x->ptr.p_double[1] = beta;
14704 
14705     /*
14706      * Scale back outputs
14707      */
14708     x->ptr.p_double[1] = x->ptr.p_double[1]*s;
14709 }
14710 
14711 
14712 /*************************************************************************
14713 Application of an elementary reflection to a rectangular matrix of size MxN
14714 
14715 The algorithm pre-multiplies the matrix by an elementary reflection transformation
14716 which is given by column V and scalar Tau (see the description of the
14717 GenerateReflection procedure). Not the whole matrix but only a part of it
14718 is transformed (rows from M1 to M2, columns from N1 to N2). Only the elements
14719 of this submatrix are changed.
14720 
14721 Input parameters:
14722     C       -   matrix to be transformed.
14723     Tau     -   scalar defining the transformation.
14724     V       -   column defining the transformation.
14725                 Array whose index ranges within [1..M2-M1+1].
14726     M1, M2  -   range of rows to be transformed.
14727     N1, N2  -   range of columns to be transformed.
14728     WORK    -   working array whose indexes goes from N1 to N2.
14729 
14730 Output parameters:
14731     C       -   the result of multiplying the input matrix C by the
14732                 transformation matrix which is given by Tau and V.
14733                 If N1>N2 or M1>M2, C is not modified.
14734 
14735   -- LAPACK auxiliary routine (version 3.0) --
14736      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
14737      Courant Institute, Argonne National Lab, and Rice University
14738      September 30, 1994
14739 *************************************************************************/
applyreflectionfromtheleft(ae_matrix * c,double tau,ae_vector * v,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * work,ae_state * _state)14740 void applyreflectionfromtheleft(/* Real    */ ae_matrix* c,
14741      double tau,
14742      /* Real    */ ae_vector* v,
14743      ae_int_t m1,
14744      ae_int_t m2,
14745      ae_int_t n1,
14746      ae_int_t n2,
14747      /* Real    */ ae_vector* work,
14748      ae_state *_state)
14749 {
14750 
14751 
14752     if( (ae_fp_eq(tau,(double)(0))||n1>n2)||m1>m2 )
14753     {
14754         return;
14755     }
14756     rvectorsetlengthatleast(work, n2-n1+1, _state);
14757     rmatrixgemv(n2-n1+1, m2-m1+1, 1.0, c, m1, n1, 1, v, 1, 0.0, work, 0, _state);
14758     rmatrixger(m2-m1+1, n2-n1+1, c, m1, n1, -tau, v, 1, work, 0, _state);
14759 }
14760 
14761 
14762 /*************************************************************************
14763 Application of an elementary reflection to a rectangular matrix of size MxN
14764 
14765 The algorithm post-multiplies the matrix by an elementary reflection transformation
14766 which is given by column V and scalar Tau (see the description of the
14767 GenerateReflection procedure). Not the whole matrix but only a part of it
14768 is transformed (rows from M1 to M2, columns from N1 to N2). Only the
14769 elements of this submatrix are changed.
14770 
14771 Input parameters:
14772     C       -   matrix to be transformed.
14773     Tau     -   scalar defining the transformation.
14774     V       -   column defining the transformation.
14775                 Array whose index ranges within [1..N2-N1+1].
14776     M1, M2  -   range of rows to be transformed.
14777     N1, N2  -   range of columns to be transformed.
14778     WORK    -   working array whose indexes goes from M1 to M2.
14779 
14780 Output parameters:
14781     C       -   the result of multiplying the input matrix C by the
14782                 transformation matrix which is given by Tau and V.
14783                 If N1>N2 or M1>M2, C is not modified.
14784 
14785   -- LAPACK auxiliary routine (version 3.0) --
14786      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
14787      Courant Institute, Argonne National Lab, and Rice University
14788      September 30, 1994
14789 *************************************************************************/
applyreflectionfromtheright(ae_matrix * c,double tau,ae_vector * v,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * work,ae_state * _state)14790 void applyreflectionfromtheright(/* Real    */ ae_matrix* c,
14791      double tau,
14792      /* Real    */ ae_vector* v,
14793      ae_int_t m1,
14794      ae_int_t m2,
14795      ae_int_t n1,
14796      ae_int_t n2,
14797      /* Real    */ ae_vector* work,
14798      ae_state *_state)
14799 {
14800 
14801 
14802     if( (ae_fp_eq(tau,(double)(0))||n1>n2)||m1>m2 )
14803     {
14804         return;
14805     }
14806     rvectorsetlengthatleast(work, m2-m1+1, _state);
14807     rmatrixgemv(m2-m1+1, n2-n1+1, 1.0, c, m1, n1, 0, v, 1, 0.0, work, 0, _state);
14808     rmatrixger(m2-m1+1, n2-n1+1, c, m1, n1, -tau, work, 0, v, 1, _state);
14809 }
14810 
14811 
14812 /*************************************************************************
14813 Cache-oblivous complex "copy-and-transpose"
14814 
14815 Input parameters:
14816     M   -   number of rows
14817     N   -   number of columns
14818     A   -   source matrix, MxN submatrix is copied and transposed
14819     IA  -   submatrix offset (row index)
14820     JA  -   submatrix offset (column index)
14821     B   -   destination matrix, must be large enough to store result
14822     IB  -   submatrix offset (row index)
14823     JB  -   submatrix offset (column index)
14824 *************************************************************************/
cmatrixtranspose(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_state * _state)14825 void cmatrixtranspose(ae_int_t m,
14826      ae_int_t n,
14827      /* Complex */ ae_matrix* a,
14828      ae_int_t ia,
14829      ae_int_t ja,
14830      /* Complex */ ae_matrix* b,
14831      ae_int_t ib,
14832      ae_int_t jb,
14833      ae_state *_state)
14834 {
14835     ae_int_t i;
14836     ae_int_t s1;
14837     ae_int_t s2;
14838 
14839 
14840     if( m<=2*ablascomplexblocksize(a, _state)&&n<=2*ablascomplexblocksize(a, _state) )
14841     {
14842 
14843         /*
14844          * base case
14845          */
14846         for(i=0; i<=m-1; i++)
14847         {
14848             ae_v_cmove(&b->ptr.pp_complex[ib][jb+i], b->stride, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(ib,ib+n-1));
14849         }
14850     }
14851     else
14852     {
14853 
14854         /*
14855          * Cache-oblivious recursion
14856          */
14857         if( m>n )
14858         {
14859             ablascomplexsplitlength(a, m, &s1, &s2, _state);
14860             cmatrixtranspose(s1, n, a, ia, ja, b, ib, jb, _state);
14861             cmatrixtranspose(s2, n, a, ia+s1, ja, b, ib, jb+s1, _state);
14862         }
14863         else
14864         {
14865             ablascomplexsplitlength(a, n, &s1, &s2, _state);
14866             cmatrixtranspose(m, s1, a, ia, ja, b, ib, jb, _state);
14867             cmatrixtranspose(m, s2, a, ia, ja+s1, b, ib+s1, jb, _state);
14868         }
14869     }
14870 }
14871 
14872 
14873 /*************************************************************************
14874 Cache-oblivous real "copy-and-transpose"
14875 
14876 Input parameters:
14877     M   -   number of rows
14878     N   -   number of columns
14879     A   -   source matrix, MxN submatrix is copied and transposed
14880     IA  -   submatrix offset (row index)
14881     JA  -   submatrix offset (column index)
14882     B   -   destination matrix, must be large enough to store result
14883     IB  -   submatrix offset (row index)
14884     JB  -   submatrix offset (column index)
14885 *************************************************************************/
rmatrixtranspose(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_state * _state)14886 void rmatrixtranspose(ae_int_t m,
14887      ae_int_t n,
14888      /* Real    */ ae_matrix* a,
14889      ae_int_t ia,
14890      ae_int_t ja,
14891      /* Real    */ ae_matrix* b,
14892      ae_int_t ib,
14893      ae_int_t jb,
14894      ae_state *_state)
14895 {
14896     ae_int_t i;
14897     ae_int_t s1;
14898     ae_int_t s2;
14899 
14900 
14901     if( m<=2*ablasblocksize(a, _state)&&n<=2*ablasblocksize(a, _state) )
14902     {
14903 
14904         /*
14905          * base case
14906          */
14907         for(i=0; i<=m-1; i++)
14908         {
14909             ae_v_move(&b->ptr.pp_double[ib][jb+i], b->stride, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(ib,ib+n-1));
14910         }
14911     }
14912     else
14913     {
14914 
14915         /*
14916          * Cache-oblivious recursion
14917          */
14918         if( m>n )
14919         {
14920             ablassplitlength(a, m, &s1, &s2, _state);
14921             rmatrixtranspose(s1, n, a, ia, ja, b, ib, jb, _state);
14922             rmatrixtranspose(s2, n, a, ia+s1, ja, b, ib, jb+s1, _state);
14923         }
14924         else
14925         {
14926             ablassplitlength(a, n, &s1, &s2, _state);
14927             rmatrixtranspose(m, s1, a, ia, ja, b, ib, jb, _state);
14928             rmatrixtranspose(m, s2, a, ia, ja+s1, b, ib+s1, jb, _state);
14929         }
14930     }
14931 }
14932 
14933 
14934 /*************************************************************************
14935 This code enforces symmetricy of the matrix by copying Upper part to lower
14936 one (or vice versa).
14937 
14938 INPUT PARAMETERS:
14939     A   -   matrix
14940     N   -   number of rows/columns
14941     IsUpper - whether we want to copy upper triangle to lower one (True)
14942             or vice versa (False).
14943 *************************************************************************/
rmatrixenforcesymmetricity(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_state * _state)14944 void rmatrixenforcesymmetricity(/* Real    */ ae_matrix* a,
14945      ae_int_t n,
14946      ae_bool isupper,
14947      ae_state *_state)
14948 {
14949     ae_int_t i;
14950     ae_int_t j;
14951 
14952 
14953     if( isupper )
14954     {
14955         for(i=0; i<=n-1; i++)
14956         {
14957             for(j=i+1; j<=n-1; j++)
14958             {
14959                 a->ptr.pp_double[j][i] = a->ptr.pp_double[i][j];
14960             }
14961         }
14962     }
14963     else
14964     {
14965         for(i=0; i<=n-1; i++)
14966         {
14967             for(j=i+1; j<=n-1; j++)
14968             {
14969                 a->ptr.pp_double[i][j] = a->ptr.pp_double[j][i];
14970             }
14971         }
14972     }
14973 }
14974 
14975 
14976 /*************************************************************************
14977 Copy
14978 
14979 Input parameters:
14980     M   -   number of rows
14981     N   -   number of columns
14982     A   -   source matrix, MxN submatrix is copied and transposed
14983     IA  -   submatrix offset (row index)
14984     JA  -   submatrix offset (column index)
14985     B   -   destination matrix, must be large enough to store result
14986     IB  -   submatrix offset (row index)
14987     JB  -   submatrix offset (column index)
14988 *************************************************************************/
cmatrixcopy(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_state * _state)14989 void cmatrixcopy(ae_int_t m,
14990      ae_int_t n,
14991      /* Complex */ ae_matrix* a,
14992      ae_int_t ia,
14993      ae_int_t ja,
14994      /* Complex */ ae_matrix* b,
14995      ae_int_t ib,
14996      ae_int_t jb,
14997      ae_state *_state)
14998 {
14999     ae_int_t i;
15000 
15001 
15002     if( m==0||n==0 )
15003     {
15004         return;
15005     }
15006     for(i=0; i<=m-1; i++)
15007     {
15008         ae_v_cmove(&b->ptr.pp_complex[ib+i][jb], 1, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(jb,jb+n-1));
15009     }
15010 }
15011 
15012 
15013 /*************************************************************************
15014 Copy
15015 
15016 Input parameters:
15017     N   -   subvector size
15018     A   -   source vector, N elements are copied
15019     IA  -   source offset (first element index)
15020     B   -   destination vector, must be large enough to store result
15021     IB  -   destination offset (first element index)
15022 *************************************************************************/
rvectorcopy(ae_int_t n,ae_vector * a,ae_int_t ia,ae_vector * b,ae_int_t ib,ae_state * _state)15023 void rvectorcopy(ae_int_t n,
15024      /* Real    */ ae_vector* a,
15025      ae_int_t ia,
15026      /* Real    */ ae_vector* b,
15027      ae_int_t ib,
15028      ae_state *_state)
15029 {
15030 
15031 
15032     if( n==0 )
15033     {
15034         return;
15035     }
15036     if( ia==0&&ib==0 )
15037     {
15038         rcopyv(n, a, b, _state);
15039     }
15040     else
15041     {
15042         rcopyvx(n, a, ia, b, ib, _state);
15043     }
15044 }
15045 
15046 
15047 /*************************************************************************
15048 Copy
15049 
15050 Input parameters:
15051     M   -   number of rows
15052     N   -   number of columns
15053     A   -   source matrix, MxN submatrix is copied and transposed
15054     IA  -   submatrix offset (row index)
15055     JA  -   submatrix offset (column index)
15056     B   -   destination matrix, must be large enough to store result
15057     IB  -   submatrix offset (row index)
15058     JB  -   submatrix offset (column index)
15059 *************************************************************************/
rmatrixcopy(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_state * _state)15060 void rmatrixcopy(ae_int_t m,
15061      ae_int_t n,
15062      /* Real    */ ae_matrix* a,
15063      ae_int_t ia,
15064      ae_int_t ja,
15065      /* Real    */ ae_matrix* b,
15066      ae_int_t ib,
15067      ae_int_t jb,
15068      ae_state *_state)
15069 {
15070     ae_int_t i;
15071 
15072 
15073     if( m==0||n==0 )
15074     {
15075         return;
15076     }
15077     for(i=0; i<=m-1; i++)
15078     {
15079         ae_v_move(&b->ptr.pp_double[ib+i][jb], 1, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(jb,jb+n-1));
15080     }
15081 }
15082 
15083 
15084 /*************************************************************************
15085 Performs generalized copy: B := Beta*B + Alpha*A.
15086 
15087 If Beta=0, then previous contents of B is simply ignored. If Alpha=0, then
15088 A is ignored and not referenced. If both Alpha and Beta  are  zero,  B  is
15089 filled by zeros.
15090 
15091 Input parameters:
15092     M   -   number of rows
15093     N   -   number of columns
15094     Alpha-  coefficient
15095     A   -   source matrix, MxN submatrix is copied and transposed
15096     IA  -   submatrix offset (row index)
15097     JA  -   submatrix offset (column index)
15098     Beta-   coefficient
15099     B   -   destination matrix, must be large enough to store result
15100     IB  -   submatrix offset (row index)
15101     JB  -   submatrix offset (column index)
15102 *************************************************************************/
rmatrixgencopy(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,double beta,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_state * _state)15103 void rmatrixgencopy(ae_int_t m,
15104      ae_int_t n,
15105      double alpha,
15106      /* Real    */ ae_matrix* a,
15107      ae_int_t ia,
15108      ae_int_t ja,
15109      double beta,
15110      /* Real    */ ae_matrix* b,
15111      ae_int_t ib,
15112      ae_int_t jb,
15113      ae_state *_state)
15114 {
15115     ae_int_t i;
15116     ae_int_t j;
15117 
15118 
15119     if( m==0||n==0 )
15120     {
15121         return;
15122     }
15123 
15124     /*
15125      * Zero-fill
15126      */
15127     if( ae_fp_eq(alpha,(double)(0))&&ae_fp_eq(beta,(double)(0)) )
15128     {
15129         for(i=0; i<=m-1; i++)
15130         {
15131             for(j=0; j<=n-1; j++)
15132             {
15133                 b->ptr.pp_double[ib+i][jb+j] = (double)(0);
15134             }
15135         }
15136         return;
15137     }
15138 
15139     /*
15140      * Inplace multiply
15141      */
15142     if( ae_fp_eq(alpha,(double)(0)) )
15143     {
15144         for(i=0; i<=m-1; i++)
15145         {
15146             for(j=0; j<=n-1; j++)
15147             {
15148                 b->ptr.pp_double[ib+i][jb+j] = beta*b->ptr.pp_double[ib+i][jb+j];
15149             }
15150         }
15151         return;
15152     }
15153 
15154     /*
15155      * Multiply and copy
15156      */
15157     if( ae_fp_eq(beta,(double)(0)) )
15158     {
15159         for(i=0; i<=m-1; i++)
15160         {
15161             for(j=0; j<=n-1; j++)
15162             {
15163                 b->ptr.pp_double[ib+i][jb+j] = alpha*a->ptr.pp_double[ia+i][ja+j];
15164             }
15165         }
15166         return;
15167     }
15168 
15169     /*
15170      * Generic
15171      */
15172     for(i=0; i<=m-1; i++)
15173     {
15174         for(j=0; j<=n-1; j++)
15175         {
15176             b->ptr.pp_double[ib+i][jb+j] = alpha*a->ptr.pp_double[ia+i][ja+j]+beta*b->ptr.pp_double[ib+i][jb+j];
15177         }
15178     }
15179 }
15180 
15181 
15182 /*************************************************************************
15183 Rank-1 correction: A := A + alpha*u*v'
15184 
15185 NOTE: this  function  expects  A  to  be  large enough to store result. No
15186       automatic preallocation happens for  smaller  arrays.  No  integrity
15187       checks is performed for sizes of A, u, v.
15188 
15189 INPUT PARAMETERS:
15190     M   -   number of rows
15191     N   -   number of columns
15192     A   -   target matrix, MxN submatrix is updated
15193     IA  -   submatrix offset (row index)
15194     JA  -   submatrix offset (column index)
15195     Alpha-  coefficient
15196     U   -   vector #1
15197     IU  -   subvector offset
15198     V   -   vector #2
15199     IV  -   subvector offset
15200 
15201 
15202   -- ALGLIB routine --
15203 
15204      16.10.2017
15205      Bochkanov Sergey
15206 *************************************************************************/
rmatrixger(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,double alpha,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)15207 void rmatrixger(ae_int_t m,
15208      ae_int_t n,
15209      /* Real    */ ae_matrix* a,
15210      ae_int_t ia,
15211      ae_int_t ja,
15212      double alpha,
15213      /* Real    */ ae_vector* u,
15214      ae_int_t iu,
15215      /* Real    */ ae_vector* v,
15216      ae_int_t iv,
15217      ae_state *_state)
15218 {
15219     ae_int_t i;
15220     double s;
15221 
15222 
15223 
15224     /*
15225      * Quick exit
15226      */
15227     if( m<=0||n<=0 )
15228     {
15229         return;
15230     }
15231 
15232     /*
15233      * Try fast kernels:
15234      * * vendor kernel
15235      * * internal kernel
15236      */
15237     if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
15238     {
15239 
15240         /*
15241          * Try MKL kernel first
15242          */
15243         if( rmatrixgermkl(m, n, a, ia, ja, alpha, u, iu, v, iv, _state) )
15244         {
15245             return;
15246         }
15247     }
15248     if( rmatrixgerf(m, n, a, ia, ja, alpha, u, iu, v, iv, _state) )
15249     {
15250         return;
15251     }
15252 
15253     /*
15254      * Generic code
15255      */
15256     for(i=0; i<=m-1; i++)
15257     {
15258         s = alpha*u->ptr.p_double[iu+i];
15259         ae_v_addd(&a->ptr.pp_double[ia+i][ja], 1, &v->ptr.p_double[iv], 1, ae_v_len(ja,ja+n-1), s);
15260     }
15261 }
15262 
15263 
15264 /*************************************************************************
15265 Rank-1 correction: A := A + u*v'
15266 
15267 INPUT PARAMETERS:
15268     M   -   number of rows
15269     N   -   number of columns
15270     A   -   target matrix, MxN submatrix is updated
15271     IA  -   submatrix offset (row index)
15272     JA  -   submatrix offset (column index)
15273     U   -   vector #1
15274     IU  -   subvector offset
15275     V   -   vector #2
15276     IV  -   subvector offset
15277 *************************************************************************/
cmatrixrank1(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)15278 void cmatrixrank1(ae_int_t m,
15279      ae_int_t n,
15280      /* Complex */ ae_matrix* a,
15281      ae_int_t ia,
15282      ae_int_t ja,
15283      /* Complex */ ae_vector* u,
15284      ae_int_t iu,
15285      /* Complex */ ae_vector* v,
15286      ae_int_t iv,
15287      ae_state *_state)
15288 {
15289     ae_int_t i;
15290     ae_complex s;
15291 
15292 
15293 
15294     /*
15295      * Quick exit
15296      */
15297     if( m<=0||n<=0 )
15298     {
15299         return;
15300     }
15301 
15302     /*
15303      * Try fast kernels:
15304      * * vendor kernel
15305      * * internal kernel
15306      */
15307     if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
15308     {
15309 
15310         /*
15311          * Try MKL kernel first
15312          */
15313         if( cmatrixrank1mkl(m, n, a, ia, ja, u, iu, v, iv, _state) )
15314         {
15315             return;
15316         }
15317     }
15318     if( cmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv, _state) )
15319     {
15320         return;
15321     }
15322 
15323     /*
15324      * Generic code
15325      */
15326     for(i=0; i<=m-1; i++)
15327     {
15328         s = u->ptr.p_complex[iu+i];
15329         ae_v_caddc(&a->ptr.pp_complex[ia+i][ja], 1, &v->ptr.p_complex[iv], 1, "N", ae_v_len(ja,ja+n-1), s);
15330     }
15331 }
15332 
15333 
15334 /*************************************************************************
15335 IMPORTANT: this function is deprecated since ALGLIB 3.13. Use RMatrixGER()
15336            which is more generic version of this function.
15337 
15338 Rank-1 correction: A := A + u*v'
15339 
15340 INPUT PARAMETERS:
15341     M   -   number of rows
15342     N   -   number of columns
15343     A   -   target matrix, MxN submatrix is updated
15344     IA  -   submatrix offset (row index)
15345     JA  -   submatrix offset (column index)
15346     U   -   vector #1
15347     IU  -   subvector offset
15348     V   -   vector #2
15349     IV  -   subvector offset
15350 *************************************************************************/
rmatrixrank1(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)15351 void rmatrixrank1(ae_int_t m,
15352      ae_int_t n,
15353      /* Real    */ ae_matrix* a,
15354      ae_int_t ia,
15355      ae_int_t ja,
15356      /* Real    */ ae_vector* u,
15357      ae_int_t iu,
15358      /* Real    */ ae_vector* v,
15359      ae_int_t iv,
15360      ae_state *_state)
15361 {
15362     ae_int_t i;
15363     double s;
15364 
15365 
15366 
15367     /*
15368      * Quick exit
15369      */
15370     if( m<=0||n<=0 )
15371     {
15372         return;
15373     }
15374 
15375     /*
15376      * Try fast kernels:
15377      * * vendor kernel
15378      * * internal kernel
15379      */
15380     if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
15381     {
15382 
15383         /*
15384          * Try MKL kernel first
15385          */
15386         if( rmatrixrank1mkl(m, n, a, ia, ja, u, iu, v, iv, _state) )
15387         {
15388             return;
15389         }
15390     }
15391     if( rmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv, _state) )
15392     {
15393         return;
15394     }
15395 
15396     /*
15397      * Generic code
15398      */
15399     for(i=0; i<=m-1; i++)
15400     {
15401         s = u->ptr.p_double[iu+i];
15402         ae_v_addd(&a->ptr.pp_double[ia+i][ja], 1, &v->ptr.p_double[iv], 1, ae_v_len(ja,ja+n-1), s);
15403     }
15404 }
15405 
15406 
rmatrixgemv(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)15407 void rmatrixgemv(ae_int_t m,
15408      ae_int_t n,
15409      double alpha,
15410      /* Real    */ ae_matrix* a,
15411      ae_int_t ia,
15412      ae_int_t ja,
15413      ae_int_t opa,
15414      /* Real    */ ae_vector* x,
15415      ae_int_t ix,
15416      double beta,
15417      /* Real    */ ae_vector* y,
15418      ae_int_t iy,
15419      ae_state *_state)
15420 {
15421 
15422 
15423 
15424     /*
15425      * Quick exit for M=0, N=0 or Alpha=0.
15426      *
15427      * After this block we have M>0, N>0, Alpha<>0.
15428      */
15429     if( m<=0 )
15430     {
15431         return;
15432     }
15433     if( n<=0||ae_fp_eq(alpha,0.0) )
15434     {
15435         if( ae_fp_neq(beta,(double)(0)) )
15436         {
15437             rmulvx(m, beta, y, iy, _state);
15438         }
15439         else
15440         {
15441             rsetvx(m, 0.0, y, iy, _state);
15442         }
15443         return;
15444     }
15445 
15446     /*
15447      * Try fast kernels
15448      */
15449     if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
15450     {
15451 
15452         /*
15453          * Try MKL kernel
15454          */
15455         if( rmatrixgemvmkl(m, n, alpha, a, ia, ja, opa, x, ix, beta, y, iy, _state) )
15456         {
15457             return;
15458         }
15459     }
15460     if( ia+ja+ix+iy==0 )
15461     {
15462         rgemv(m, n, alpha, a, opa, x, beta, y, _state);
15463     }
15464     else
15465     {
15466         rgemvx(m, n, alpha, a, ia, ja, opa, x, ix, beta, y, iy, _state);
15467     }
15468 }
15469 
15470 
15471 /*************************************************************************
15472 Matrix-vector product: y := op(A)*x
15473 
15474 INPUT PARAMETERS:
15475     M   -   number of rows of op(A)
15476             M>=0
15477     N   -   number of columns of op(A)
15478             N>=0
15479     A   -   target matrix
15480     IA  -   submatrix offset (row index)
15481     JA  -   submatrix offset (column index)
15482     OpA -   operation type:
15483             * OpA=0     =>  op(A) = A
15484             * OpA=1     =>  op(A) = A^T
15485             * OpA=2     =>  op(A) = A^H
15486     X   -   input vector
15487     IX  -   subvector offset
15488     IY  -   subvector offset
15489     Y   -   preallocated matrix, must be large enough to store result
15490 
15491 OUTPUT PARAMETERS:
15492     Y   -   vector which stores result
15493 
15494 if M=0, then subroutine does nothing.
15495 if N=0, Y is filled by zeros.
15496 
15497 
15498   -- ALGLIB routine --
15499 
15500      28.01.2010
15501      Bochkanov Sergey
15502 *************************************************************************/
cmatrixmv(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,ae_vector * y,ae_int_t iy,ae_state * _state)15503 void cmatrixmv(ae_int_t m,
15504      ae_int_t n,
15505      /* Complex */ ae_matrix* a,
15506      ae_int_t ia,
15507      ae_int_t ja,
15508      ae_int_t opa,
15509      /* Complex */ ae_vector* x,
15510      ae_int_t ix,
15511      /* Complex */ ae_vector* y,
15512      ae_int_t iy,
15513      ae_state *_state)
15514 {
15515     ae_int_t i;
15516     ae_complex v;
15517 
15518 
15519 
15520     /*
15521      * Quick exit
15522      */
15523     if( m==0 )
15524     {
15525         return;
15526     }
15527     if( n==0 )
15528     {
15529         for(i=0; i<=m-1; i++)
15530         {
15531             y->ptr.p_complex[iy+i] = ae_complex_from_i(0);
15532         }
15533         return;
15534     }
15535 
15536     /*
15537      * Try fast kernels
15538      */
15539     if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
15540     {
15541 
15542         /*
15543          * Try MKL kernel
15544          */
15545         if( cmatrixmvmkl(m, n, a, ia, ja, opa, x, ix, y, iy, _state) )
15546         {
15547             return;
15548         }
15549     }
15550 
15551     /*
15552      * Generic code
15553      */
15554     if( opa==0 )
15555     {
15556 
15557         /*
15558          * y = A*x
15559          */
15560         for(i=0; i<=m-1; i++)
15561         {
15562             v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &x->ptr.p_complex[ix], 1, "N", ae_v_len(ja,ja+n-1));
15563             y->ptr.p_complex[iy+i] = v;
15564         }
15565         return;
15566     }
15567     if( opa==1 )
15568     {
15569 
15570         /*
15571          * y = A^T*x
15572          */
15573         for(i=0; i<=m-1; i++)
15574         {
15575             y->ptr.p_complex[iy+i] = ae_complex_from_i(0);
15576         }
15577         for(i=0; i<=n-1; i++)
15578         {
15579             v = x->ptr.p_complex[ix+i];
15580             ae_v_caddc(&y->ptr.p_complex[iy], 1, &a->ptr.pp_complex[ia+i][ja], 1, "N", ae_v_len(iy,iy+m-1), v);
15581         }
15582         return;
15583     }
15584     if( opa==2 )
15585     {
15586 
15587         /*
15588          * y = A^H*x
15589          */
15590         for(i=0; i<=m-1; i++)
15591         {
15592             y->ptr.p_complex[iy+i] = ae_complex_from_i(0);
15593         }
15594         for(i=0; i<=n-1; i++)
15595         {
15596             v = x->ptr.p_complex[ix+i];
15597             ae_v_caddc(&y->ptr.p_complex[iy], 1, &a->ptr.pp_complex[ia+i][ja], 1, "Conj", ae_v_len(iy,iy+m-1), v);
15598         }
15599         return;
15600     }
15601 }
15602 
15603 
15604 /*************************************************************************
15605 IMPORTANT: this function is deprecated since ALGLIB 3.13. Use RMatrixGEMV()
15606            which is more generic version of this function.
15607 
15608 Matrix-vector product: y := op(A)*x
15609 
15610 INPUT PARAMETERS:
15611     M   -   number of rows of op(A)
15612     N   -   number of columns of op(A)
15613     A   -   target matrix
15614     IA  -   submatrix offset (row index)
15615     JA  -   submatrix offset (column index)
15616     OpA -   operation type:
15617             * OpA=0     =>  op(A) = A
15618             * OpA=1     =>  op(A) = A^T
15619     X   -   input vector
15620     IX  -   subvector offset
15621     IY  -   subvector offset
15622     Y   -   preallocated matrix, must be large enough to store result
15623 
15624 OUTPUT PARAMETERS:
15625     Y   -   vector which stores result
15626 
15627 if M=0, then subroutine does nothing.
15628 if N=0, Y is filled by zeros.
15629 
15630 
15631   -- ALGLIB routine --
15632 
15633      28.01.2010
15634      Bochkanov Sergey
15635 *************************************************************************/
rmatrixmv(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,ae_vector * y,ae_int_t iy,ae_state * _state)15636 void rmatrixmv(ae_int_t m,
15637      ae_int_t n,
15638      /* Real    */ ae_matrix* a,
15639      ae_int_t ia,
15640      ae_int_t ja,
15641      ae_int_t opa,
15642      /* Real    */ ae_vector* x,
15643      ae_int_t ix,
15644      /* Real    */ ae_vector* y,
15645      ae_int_t iy,
15646      ae_state *_state)
15647 {
15648     ae_int_t i;
15649     double v;
15650 
15651 
15652 
15653     /*
15654      * Quick exit
15655      */
15656     if( m==0 )
15657     {
15658         return;
15659     }
15660     if( n==0 )
15661     {
15662         for(i=0; i<=m-1; i++)
15663         {
15664             y->ptr.p_double[iy+i] = (double)(0);
15665         }
15666         return;
15667     }
15668 
15669     /*
15670      * Try fast kernels
15671      */
15672     if( m>ablas_blas2minvendorkernelsize&&n>ablas_blas2minvendorkernelsize )
15673     {
15674 
15675         /*
15676          * Try MKL kernel
15677          */
15678         if( rmatrixmvmkl(m, n, a, ia, ja, opa, x, ix, y, iy, _state) )
15679         {
15680             return;
15681         }
15682     }
15683 
15684     /*
15685      * Generic code
15686      */
15687     if( opa==0 )
15688     {
15689 
15690         /*
15691          * y = A*x
15692          */
15693         for(i=0; i<=m-1; i++)
15694         {
15695             v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &x->ptr.p_double[ix], 1, ae_v_len(ja,ja+n-1));
15696             y->ptr.p_double[iy+i] = v;
15697         }
15698         return;
15699     }
15700     if( opa==1 )
15701     {
15702 
15703         /*
15704          * y = A^T*x
15705          */
15706         for(i=0; i<=m-1; i++)
15707         {
15708             y->ptr.p_double[iy+i] = (double)(0);
15709         }
15710         for(i=0; i<=n-1; i++)
15711         {
15712             v = x->ptr.p_double[ix+i];
15713             ae_v_addd(&y->ptr.p_double[iy], 1, &a->ptr.pp_double[ia+i][ja], 1, ae_v_len(iy,iy+m-1), v);
15714         }
15715         return;
15716     }
15717 }
15718 
15719 
rmatrixsymv(ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)15720 void rmatrixsymv(ae_int_t n,
15721      double alpha,
15722      /* Real    */ ae_matrix* a,
15723      ae_int_t ia,
15724      ae_int_t ja,
15725      ae_bool isupper,
15726      /* Real    */ ae_vector* x,
15727      ae_int_t ix,
15728      double beta,
15729      /* Real    */ ae_vector* y,
15730      ae_int_t iy,
15731      ae_state *_state)
15732 {
15733     ae_int_t i;
15734     ae_int_t j;
15735     double v;
15736     double vr;
15737     double vx;
15738 
15739 
15740 
15741     /*
15742      * Quick exit for M=0, N=0 or Alpha=0.
15743      *
15744      * After this block we have M>0, N>0, Alpha<>0.
15745      */
15746     if( n<=0 )
15747     {
15748         return;
15749     }
15750     if( ae_fp_eq(alpha,0.0) )
15751     {
15752         if( ae_fp_neq(beta,(double)(0)) )
15753         {
15754             for(i=0; i<=n-1; i++)
15755             {
15756                 y->ptr.p_double[iy+i] = beta*y->ptr.p_double[iy+i];
15757             }
15758         }
15759         else
15760         {
15761             for(i=0; i<=n-1; i++)
15762             {
15763                 y->ptr.p_double[iy+i] = 0.0;
15764             }
15765         }
15766         return;
15767     }
15768 
15769     /*
15770      * Try fast kernels
15771      */
15772     if( n>ablas_blas2minvendorkernelsize )
15773     {
15774 
15775         /*
15776          * Try MKL kernel
15777          */
15778         if( rmatrixsymvmkl(n, alpha, a, ia, ja, isupper, x, ix, beta, y, iy, _state) )
15779         {
15780             return;
15781         }
15782     }
15783 
15784     /*
15785      * Generic code
15786      */
15787     if( ae_fp_neq(beta,(double)(0)) )
15788     {
15789         for(i=0; i<=n-1; i++)
15790         {
15791             y->ptr.p_double[iy+i] = beta*y->ptr.p_double[iy+i];
15792         }
15793     }
15794     else
15795     {
15796         for(i=0; i<=n-1; i++)
15797         {
15798             y->ptr.p_double[iy+i] = 0.0;
15799         }
15800     }
15801     if( isupper )
15802     {
15803 
15804         /*
15805          * Upper triangle of A is stored
15806          */
15807         for(i=0; i<=n-1; i++)
15808         {
15809 
15810             /*
15811              * Process diagonal element
15812              */
15813             v = alpha*a->ptr.pp_double[ia+i][ja+i];
15814             y->ptr.p_double[iy+i] = y->ptr.p_double[iy+i]+v*x->ptr.p_double[ix+i];
15815 
15816             /*
15817              * Process off-diagonal elements
15818              */
15819             vr = 0.0;
15820             vx = x->ptr.p_double[ix+i];
15821             for(j=i+1; j<=n-1; j++)
15822             {
15823                 v = alpha*a->ptr.pp_double[ia+i][ja+j];
15824                 y->ptr.p_double[iy+j] = y->ptr.p_double[iy+j]+v*vx;
15825                 vr = vr+v*x->ptr.p_double[ix+j];
15826             }
15827             y->ptr.p_double[iy+i] = y->ptr.p_double[iy+i]+vr;
15828         }
15829     }
15830     else
15831     {
15832 
15833         /*
15834          * Lower triangle of A is stored
15835          */
15836         for(i=0; i<=n-1; i++)
15837         {
15838 
15839             /*
15840              * Process diagonal element
15841              */
15842             v = alpha*a->ptr.pp_double[ia+i][ja+i];
15843             y->ptr.p_double[iy+i] = y->ptr.p_double[iy+i]+v*x->ptr.p_double[ix+i];
15844 
15845             /*
15846              * Process off-diagonal elements
15847              */
15848             vr = 0.0;
15849             vx = x->ptr.p_double[ix+i];
15850             for(j=0; j<=i-1; j++)
15851             {
15852                 v = alpha*a->ptr.pp_double[ia+i][ja+j];
15853                 y->ptr.p_double[iy+j] = y->ptr.p_double[iy+j]+v*vx;
15854                 vr = vr+v*x->ptr.p_double[ix+j];
15855             }
15856             y->ptr.p_double[iy+i] = y->ptr.p_double[iy+i]+vr;
15857         }
15858     }
15859 }
15860 
15861 
rmatrixsyvmv(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_vector * x,ae_int_t ix,ae_vector * tmp,ae_state * _state)15862 double rmatrixsyvmv(ae_int_t n,
15863      /* Real    */ ae_matrix* a,
15864      ae_int_t ia,
15865      ae_int_t ja,
15866      ae_bool isupper,
15867      /* Real    */ ae_vector* x,
15868      ae_int_t ix,
15869      /* Real    */ ae_vector* tmp,
15870      ae_state *_state)
15871 {
15872     ae_int_t i;
15873     double result;
15874 
15875 
15876 
15877     /*
15878      * Quick exit for N=0
15879      */
15880     if( n<=0 )
15881     {
15882         result = (double)(0);
15883         return result;
15884     }
15885 
15886     /*
15887      * Generic code
15888      */
15889     rmatrixsymv(n, 1.0, a, ia, ja, isupper, x, ix, 0.0, tmp, 0, _state);
15890     result = (double)(0);
15891     for(i=0; i<=n-1; i++)
15892     {
15893         result = result+x->ptr.p_double[ix+i]*tmp->ptr.p_double[i];
15894     }
15895     return result;
15896 }
15897 
15898 
15899 /*************************************************************************
15900 This subroutine solves linear system op(A)*x=b where:
15901 * A is NxN upper/lower triangular/unitriangular matrix
15902 * X and B are Nx1 vectors
15903 * "op" may be identity transformation or transposition
15904 
15905 Solution replaces X.
15906 
15907 IMPORTANT: * no overflow/underflow/denegeracy tests is performed.
15908            * no integrity checks for operand sizes, out-of-bounds accesses
15909              and so on is performed
15910 
15911 INPUT PARAMETERS
15912     N   -   matrix size, N>=0
15913     A       -   matrix, actial matrix is stored in A[IA:IA+N-1,JA:JA+N-1]
15914     IA      -   submatrix offset
15915     JA      -   submatrix offset
15916     IsUpper -   whether matrix is upper triangular
15917     IsUnit  -   whether matrix is unitriangular
15918     OpType  -   transformation type:
15919                 * 0 - no transformation
15920                 * 1 - transposition
15921     X       -   right part, actual vector is stored in X[IX:IX+N-1]
15922     IX      -   offset
15923 
15924 OUTPUT PARAMETERS
15925     X       -   solution replaces elements X[IX:IX+N-1]
15926 
15927   -- ALGLIB routine / remastering of LAPACK's DTRSV --
15928      (c) 2017 Bochkanov Sergey - converted to ALGLIB
15929      (c) 2016 Reference BLAS level1 routine (LAPACK version 3.7.0)
15930      Reference BLAS is a software package provided by Univ. of Tennessee,
15931      Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.
15932 *************************************************************************/
rmatrixtrsv(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_vector * x,ae_int_t ix,ae_state * _state)15933 void rmatrixtrsv(ae_int_t n,
15934      /* Real    */ ae_matrix* a,
15935      ae_int_t ia,
15936      ae_int_t ja,
15937      ae_bool isupper,
15938      ae_bool isunit,
15939      ae_int_t optype,
15940      /* Real    */ ae_vector* x,
15941      ae_int_t ix,
15942      ae_state *_state)
15943 {
15944     ae_int_t i;
15945     ae_int_t j;
15946     double v;
15947 
15948 
15949 
15950     /*
15951      * Quick exit
15952      */
15953     if( n<=0 )
15954     {
15955         return;
15956     }
15957 
15958     /*
15959      * Try fast kernels
15960      */
15961     if( n>ablas_blas2minvendorkernelsize )
15962     {
15963 
15964         /*
15965          * Try MKL kernel
15966          */
15967         if( rmatrixtrsvmkl(n, a, ia, ja, isupper, isunit, optype, x, ix, _state) )
15968         {
15969             return;
15970         }
15971     }
15972 
15973     /*
15974      * Generic code
15975      */
15976     if( optype==0&&isupper )
15977     {
15978         for(i=n-1; i>=0; i--)
15979         {
15980             v = x->ptr.p_double[ix+i];
15981             for(j=i+1; j<=n-1; j++)
15982             {
15983                 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
15984             }
15985             if( !isunit )
15986             {
15987                 v = v/a->ptr.pp_double[ia+i][ja+i];
15988             }
15989             x->ptr.p_double[ix+i] = v;
15990         }
15991         return;
15992     }
15993     if( optype==0&&!isupper )
15994     {
15995         for(i=0; i<=n-1; i++)
15996         {
15997             v = x->ptr.p_double[ix+i];
15998             for(j=0; j<=i-1; j++)
15999             {
16000                 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
16001             }
16002             if( !isunit )
16003             {
16004                 v = v/a->ptr.pp_double[ia+i][ja+i];
16005             }
16006             x->ptr.p_double[ix+i] = v;
16007         }
16008         return;
16009     }
16010     if( optype==1&&isupper )
16011     {
16012         for(i=0; i<=n-1; i++)
16013         {
16014             v = x->ptr.p_double[ix+i];
16015             if( !isunit )
16016             {
16017                 v = v/a->ptr.pp_double[ia+i][ja+i];
16018             }
16019             x->ptr.p_double[ix+i] = v;
16020             if( v==0 )
16021             {
16022                 continue;
16023             }
16024             for(j=i+1; j<=n-1; j++)
16025             {
16026                 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
16027             }
16028         }
16029         return;
16030     }
16031     if( optype==1&&!isupper )
16032     {
16033         for(i=n-1; i>=0; i--)
16034         {
16035             v = x->ptr.p_double[ix+i];
16036             if( !isunit )
16037             {
16038                 v = v/a->ptr.pp_double[ia+i][ja+i];
16039             }
16040             x->ptr.p_double[ix+i] = v;
16041             if( v==0 )
16042             {
16043                 continue;
16044             }
16045             for(j=0; j<=i-1; j++)
16046             {
16047                 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
16048             }
16049         }
16050         return;
16051     }
16052     ae_assert(ae_false, "RMatrixTRSV: unexpected operation type", _state);
16053 }
16054 
16055 
16056 /*************************************************************************
16057 This subroutine calculates X*op(A^-1) where:
16058 * X is MxN general matrix
16059 * A is NxN upper/lower triangular/unitriangular matrix
16060 * "op" may be identity transformation, transposition, conjugate transposition
16061 Multiplication result replaces X.
16062 
16063 INPUT PARAMETERS
16064     N   -   matrix size, N>=0
16065     M   -   matrix size, N>=0
16066     A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
16067     I1      -   submatrix offset
16068     J1      -   submatrix offset
16069     IsUpper -   whether matrix is upper triangular
16070     IsUnit  -   whether matrix is unitriangular
16071     OpType  -   transformation type:
16072                 * 0 - no transformation
16073                 * 1 - transposition
16074                 * 2 - conjugate transposition
16075     X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
16076     I2  -   submatrix offset
16077     J2  -   submatrix offset
16078 
16079   ! FREE EDITION OF ALGLIB:
16080   !
16081   ! Free Edition of ALGLIB supports following important features for  this
16082   ! function:
16083   ! * C++ version: x64 SIMD support using C++ intrinsics
16084   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
16085   !
16086   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
16087   ! Reference Manual in order  to  find  out  how to activate SIMD support
16088   ! in ALGLIB.
16089 
16090   ! COMMERCIAL EDITION OF ALGLIB:
16091   !
16092   ! Commercial Edition of ALGLIB includes following important improvements
16093   ! of this function:
16094   ! * high-performance native backend with same C# interface (C# version)
16095   ! * multithreading support (C++ and C# versions)
16096   ! * hardware vendor (Intel) implementations of linear algebra primitives
16097   !   (C++ and C# versions, x86/x64 platform)
16098   !
16099   ! We recommend you to read 'Working with commercial version' section  of
16100   ! ALGLIB Reference Manual in order to find out how to  use  performance-
16101   ! related features provided by commercial edition of ALGLIB.
16102 
16103   -- ALGLIB routine --
16104      20.01.2018
16105      Bochkanov Sergey
16106 *************************************************************************/
cmatrixrighttrsm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)16107 void cmatrixrighttrsm(ae_int_t m,
16108      ae_int_t n,
16109      /* Complex */ ae_matrix* a,
16110      ae_int_t i1,
16111      ae_int_t j1,
16112      ae_bool isupper,
16113      ae_bool isunit,
16114      ae_int_t optype,
16115      /* Complex */ ae_matrix* x,
16116      ae_int_t i2,
16117      ae_int_t j2,
16118      ae_state *_state)
16119 {
16120     ae_int_t s1;
16121     ae_int_t s2;
16122     ae_int_t tsa;
16123     ae_int_t tsb;
16124     ae_int_t tscur;
16125 
16126 
16127     tsa = matrixtilesizea(_state)/2;
16128     tsb = matrixtilesizeb(_state);
16129     tscur = tsb;
16130     if( imax2(m, n, _state)<=tsb )
16131     {
16132         tscur = tsa;
16133     }
16134     ae_assert(tscur>=1, "CMatrixRightTRSM: integrity check failed", _state);
16135 
16136     /*
16137      * Upper level parallelization:
16138      * * decide whether it is feasible to activate multithreading
16139      * * perform optionally parallelized splits on M
16140      */
16141     if( m>=2*tsb&&ae_fp_greater_eq(4*rmul3((double)(m), (double)(n), (double)(n), _state),smpactivationlevel(_state)) )
16142     {
16143         if( _trypexec_cmatrixrighttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2, _state) )
16144         {
16145             return;
16146         }
16147     }
16148     if( m>=2*tsb )
16149     {
16150 
16151         /*
16152          * Split X: X*A = (X1 X2)^T*A
16153          */
16154         tiledsplit(m, tsb, &s1, &s2, _state);
16155         cmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16156         cmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
16157         return;
16158     }
16159 
16160     /*
16161      * Basecase: either MKL-supported code or ALGLIB basecase code
16162      */
16163     if( imax2(m, n, _state)<=tsb )
16164     {
16165         if( cmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
16166         {
16167             return;
16168         }
16169     }
16170     if( imax2(m, n, _state)<=tsa )
16171     {
16172         ablas_cmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16173         return;
16174     }
16175 
16176     /*
16177      * Recursive subdivision
16178      */
16179     if( m>=n )
16180     {
16181 
16182         /*
16183          * Split X: X*A = (X1 X2)^T*A
16184          */
16185         tiledsplit(m, tscur, &s1, &s2, _state);
16186         cmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16187         cmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
16188     }
16189     else
16190     {
16191 
16192         /*
16193          * Split A:
16194          *               (A1  A12)
16195          * X*op(A) = X*op(       )
16196          *               (     A2)
16197          *
16198          * Different variants depending on
16199          * IsUpper/OpType combinations
16200          */
16201         tiledsplit(n, tscur, &s1, &s2, _state);
16202         if( isupper&&optype==0 )
16203         {
16204 
16205             /*
16206              *                  (A1  A12)-1
16207              * X*A^-1 = (X1 X2)*(       )
16208              *                  (     A2)
16209              */
16210             cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16211             cmatrixgemm(m, s2, s1, ae_complex_from_d(-1.0), x, i2, j2, 0, a, i1, j1+s1, 0, ae_complex_from_d(1.0), x, i2, j2+s1, _state);
16212             cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
16213         }
16214         if( isupper&&optype!=0 )
16215         {
16216 
16217             /*
16218              *                  (A1'     )-1
16219              * X*A^-1 = (X1 X2)*(        )
16220              *                  (A12' A2')
16221              */
16222             cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
16223             cmatrixgemm(m, s1, s2, ae_complex_from_d(-1.0), x, i2, j2+s1, 0, a, i1, j1+s1, optype, ae_complex_from_d(1.0), x, i2, j2, _state);
16224             cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16225         }
16226         if( !isupper&&optype==0 )
16227         {
16228 
16229             /*
16230              *                  (A1     )-1
16231              * X*A^-1 = (X1 X2)*(       )
16232              *                  (A21  A2)
16233              */
16234             cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
16235             cmatrixgemm(m, s1, s2, ae_complex_from_d(-1.0), x, i2, j2+s1, 0, a, i1+s1, j1, 0, ae_complex_from_d(1.0), x, i2, j2, _state);
16236             cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16237         }
16238         if( !isupper&&optype!=0 )
16239         {
16240 
16241             /*
16242              *                  (A1' A21')-1
16243              * X*A^-1 = (X1 X2)*(        )
16244              *                  (     A2')
16245              */
16246             cmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16247             cmatrixgemm(m, s2, s1, ae_complex_from_d(-1.0), x, i2, j2, 0, a, i1+s1, j1, optype, ae_complex_from_d(1.0), x, i2, j2+s1, _state);
16248             cmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
16249         }
16250     }
16251 }
16252 
16253 
16254 /*************************************************************************
16255 Serial stub for GPL edition.
16256 *************************************************************************/
_trypexec_cmatrixrighttrsm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)16257 ae_bool _trypexec_cmatrixrighttrsm(ae_int_t m,
16258     ae_int_t n,
16259     /* Complex */ ae_matrix* a,
16260     ae_int_t i1,
16261     ae_int_t j1,
16262     ae_bool isupper,
16263     ae_bool isunit,
16264     ae_int_t optype,
16265     /* Complex */ ae_matrix* x,
16266     ae_int_t i2,
16267     ae_int_t j2,
16268     ae_state *_state)
16269 {
16270     return ae_false;
16271 }
16272 
16273 
16274 /*************************************************************************
16275 This subroutine calculates op(A^-1)*X where:
16276 * X is MxN general matrix
16277 * A is MxM upper/lower triangular/unitriangular matrix
16278 * "op" may be identity transformation, transposition, conjugate transposition
16279 Multiplication result replaces X.
16280 
16281 INPUT PARAMETERS
16282     N   -   matrix size, N>=0
16283     M   -   matrix size, N>=0
16284     A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
16285     I1      -   submatrix offset
16286     J1      -   submatrix offset
16287     IsUpper -   whether matrix is upper triangular
16288     IsUnit  -   whether matrix is unitriangular
16289     OpType  -   transformation type:
16290                 * 0 - no transformation
16291                 * 1 - transposition
16292                 * 2 - conjugate transposition
16293     X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
16294     I2  -   submatrix offset
16295     J2  -   submatrix offset
16296 
16297   ! FREE EDITION OF ALGLIB:
16298   !
16299   ! Free Edition of ALGLIB supports following important features for  this
16300   ! function:
16301   ! * C++ version: x64 SIMD support using C++ intrinsics
16302   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
16303   !
16304   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
16305   ! Reference Manual in order  to  find  out  how to activate SIMD support
16306   ! in ALGLIB.
16307 
16308   ! COMMERCIAL EDITION OF ALGLIB:
16309   !
16310   ! Commercial Edition of ALGLIB includes following important improvements
16311   ! of this function:
16312   ! * high-performance native backend with same C# interface (C# version)
16313   ! * multithreading support (C++ and C# versions)
16314   ! * hardware vendor (Intel) implementations of linear algebra primitives
16315   !   (C++ and C# versions, x86/x64 platform)
16316   !
16317   ! We recommend you to read 'Working with commercial version' section  of
16318   ! ALGLIB Reference Manual in order to find out how to  use  performance-
16319   ! related features provided by commercial edition of ALGLIB.
16320 
16321   -- ALGLIB routine --
16322      15.12.2009-22.01.2018
16323      Bochkanov Sergey
16324 *************************************************************************/
cmatrixlefttrsm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)16325 void cmatrixlefttrsm(ae_int_t m,
16326      ae_int_t n,
16327      /* Complex */ ae_matrix* a,
16328      ae_int_t i1,
16329      ae_int_t j1,
16330      ae_bool isupper,
16331      ae_bool isunit,
16332      ae_int_t optype,
16333      /* Complex */ ae_matrix* x,
16334      ae_int_t i2,
16335      ae_int_t j2,
16336      ae_state *_state)
16337 {
16338     ae_int_t s1;
16339     ae_int_t s2;
16340     ae_int_t tsa;
16341     ae_int_t tsb;
16342     ae_int_t tscur;
16343 
16344 
16345     tsa = matrixtilesizea(_state)/2;
16346     tsb = matrixtilesizeb(_state);
16347     tscur = tsb;
16348     if( imax2(m, n, _state)<=tsb )
16349     {
16350         tscur = tsa;
16351     }
16352     ae_assert(tscur>=1, "CMatrixLeftTRSM: integrity check failed", _state);
16353 
16354     /*
16355      * Upper level parallelization:
16356      * * decide whether it is feasible to activate multithreading
16357      * * perform optionally parallelized splits on N
16358      */
16359     if( n>=2*tsb&&ae_fp_greater_eq(4*rmul3((double)(n), (double)(m), (double)(m), _state),smpactivationlevel(_state)) )
16360     {
16361         if( _trypexec_cmatrixlefttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2, _state) )
16362         {
16363             return;
16364         }
16365     }
16366     if( n>=2*tsb )
16367     {
16368         tiledsplit(n, tscur, &s1, &s2, _state);
16369         cmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
16370         cmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16371         return;
16372     }
16373 
16374     /*
16375      * Basecase: either MKL-supported code or ALGLIB basecase code
16376      */
16377     if( imax2(m, n, _state)<=tsb )
16378     {
16379         if( cmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
16380         {
16381             return;
16382         }
16383     }
16384     if( imax2(m, n, _state)<=tsa )
16385     {
16386         ablas_cmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16387         return;
16388     }
16389 
16390     /*
16391      * Recursive subdivision
16392      */
16393     if( n>=m )
16394     {
16395 
16396         /*
16397          * Split X: op(A)^-1*X = op(A)^-1*(X1 X2)
16398          */
16399         tiledsplit(n, tscur, &s1, &s2, _state);
16400         cmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16401         cmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
16402     }
16403     else
16404     {
16405 
16406         /*
16407          * Split A
16408          */
16409         tiledsplit(m, tscur, &s1, &s2, _state);
16410         if( isupper&&optype==0 )
16411         {
16412 
16413             /*
16414              *           (A1  A12)-1  ( X1 )
16415              * A^-1*X* = (       )   *(    )
16416              *           (     A2)    ( X2 )
16417              */
16418             cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
16419             cmatrixgemm(s1, n, s2, ae_complex_from_d(-1.0), a, i1, j1+s1, 0, x, i2+s1, j2, 0, ae_complex_from_d(1.0), x, i2, j2, _state);
16420             cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16421         }
16422         if( isupper&&optype!=0 )
16423         {
16424 
16425             /*
16426              *          (A1'     )-1 ( X1 )
16427              * A^-1*X = (        )  *(    )
16428              *          (A12' A2')   ( X2 )
16429              */
16430             cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16431             cmatrixgemm(s2, n, s1, ae_complex_from_d(-1.0), a, i1, j1+s1, optype, x, i2, j2, 0, ae_complex_from_d(1.0), x, i2+s1, j2, _state);
16432             cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
16433         }
16434         if( !isupper&&optype==0 )
16435         {
16436 
16437             /*
16438              *          (A1     )-1 ( X1 )
16439              * A^-1*X = (       )  *(    )
16440              *          (A21  A2)   ( X2 )
16441              */
16442             cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16443             cmatrixgemm(s2, n, s1, ae_complex_from_d(-1.0), a, i1+s1, j1, 0, x, i2, j2, 0, ae_complex_from_d(1.0), x, i2+s1, j2, _state);
16444             cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
16445         }
16446         if( !isupper&&optype!=0 )
16447         {
16448 
16449             /*
16450              *          (A1' A21')-1 ( X1 )
16451              * A^-1*X = (        )  *(    )
16452              *          (     A2')   ( X2 )
16453              */
16454             cmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
16455             cmatrixgemm(s1, n, s2, ae_complex_from_d(-1.0), a, i1+s1, j1, optype, x, i2+s1, j2, 0, ae_complex_from_d(1.0), x, i2, j2, _state);
16456             cmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16457         }
16458     }
16459 }
16460 
16461 
16462 /*************************************************************************
16463 Serial stub for GPL edition.
16464 *************************************************************************/
_trypexec_cmatrixlefttrsm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)16465 ae_bool _trypexec_cmatrixlefttrsm(ae_int_t m,
16466     ae_int_t n,
16467     /* Complex */ ae_matrix* a,
16468     ae_int_t i1,
16469     ae_int_t j1,
16470     ae_bool isupper,
16471     ae_bool isunit,
16472     ae_int_t optype,
16473     /* Complex */ ae_matrix* x,
16474     ae_int_t i2,
16475     ae_int_t j2,
16476     ae_state *_state)
16477 {
16478     return ae_false;
16479 }
16480 
16481 
16482 /*************************************************************************
16483 This subroutine calculates X*op(A^-1) where:
16484 * X is MxN general matrix
16485 * A is NxN upper/lower triangular/unitriangular matrix
16486 * "op" may be identity transformation, transposition
16487 Multiplication result replaces X.
16488 
16489 INPUT PARAMETERS
16490     N   -   matrix size, N>=0
16491     M   -   matrix size, N>=0
16492     A       -   matrix, actial matrix is stored in A[I1:I1+N-1,J1:J1+N-1]
16493     I1      -   submatrix offset
16494     J1      -   submatrix offset
16495     IsUpper -   whether matrix is upper triangular
16496     IsUnit  -   whether matrix is unitriangular
16497     OpType  -   transformation type:
16498                 * 0 - no transformation
16499                 * 1 - transposition
16500     X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
16501     I2  -   submatrix offset
16502     J2  -   submatrix offset
16503 
16504   ! FREE EDITION OF ALGLIB:
16505   !
16506   ! Free Edition of ALGLIB supports following important features for  this
16507   ! function:
16508   ! * C++ version: x64 SIMD support using C++ intrinsics
16509   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
16510   !
16511   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
16512   ! Reference Manual in order  to  find  out  how to activate SIMD support
16513   ! in ALGLIB.
16514 
16515   ! COMMERCIAL EDITION OF ALGLIB:
16516   !
16517   ! Commercial Edition of ALGLIB includes following important improvements
16518   ! of this function:
16519   ! * high-performance native backend with same C# interface (C# version)
16520   ! * multithreading support (C++ and C# versions)
16521   ! * hardware vendor (Intel) implementations of linear algebra primitives
16522   !   (C++ and C# versions, x86/x64 platform)
16523   !
16524   ! We recommend you to read 'Working with commercial version' section  of
16525   ! ALGLIB Reference Manual in order to find out how to  use  performance-
16526   ! related features provided by commercial edition of ALGLIB.
16527 
16528   -- ALGLIB routine --
16529      15.12.2009-22.01.2018
16530      Bochkanov Sergey
16531 *************************************************************************/
rmatrixrighttrsm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)16532 void rmatrixrighttrsm(ae_int_t m,
16533      ae_int_t n,
16534      /* Real    */ ae_matrix* a,
16535      ae_int_t i1,
16536      ae_int_t j1,
16537      ae_bool isupper,
16538      ae_bool isunit,
16539      ae_int_t optype,
16540      /* Real    */ ae_matrix* x,
16541      ae_int_t i2,
16542      ae_int_t j2,
16543      ae_state *_state)
16544 {
16545     ae_int_t s1;
16546     ae_int_t s2;
16547     ae_int_t tsa;
16548     ae_int_t tsb;
16549     ae_int_t tscur;
16550 
16551 
16552     tsa = matrixtilesizea(_state);
16553     tsb = matrixtilesizeb(_state);
16554     tscur = tsb;
16555     if( imax2(m, n, _state)<=tsb )
16556     {
16557         tscur = tsa;
16558     }
16559     ae_assert(tscur>=1, "RMatrixRightTRSM: integrity check failed", _state);
16560 
16561     /*
16562      * Upper level parallelization:
16563      * * decide whether it is feasible to activate multithreading
16564      * * perform optionally parallelized splits on M
16565      */
16566     if( m>=2*tsb&&ae_fp_greater_eq(rmul3((double)(m), (double)(n), (double)(n), _state),smpactivationlevel(_state)) )
16567     {
16568         if( _trypexec_rmatrixrighttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2, _state) )
16569         {
16570             return;
16571         }
16572     }
16573     if( m>=2*tsb )
16574     {
16575 
16576         /*
16577          * Split X: X*A = (X1 X2)^T*A
16578          */
16579         tiledsplit(m, tsb, &s1, &s2, _state);
16580         rmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16581         rmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
16582         return;
16583     }
16584 
16585     /*
16586      * Basecase: MKL or ALGLIB code
16587      */
16588     if( imax2(m, n, _state)<=tsb )
16589     {
16590         if( rmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
16591         {
16592             return;
16593         }
16594     }
16595     if( imax2(m, n, _state)<=tsa )
16596     {
16597         ablas_rmatrixrighttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16598         return;
16599     }
16600 
16601     /*
16602      * Recursive subdivision
16603      */
16604     if( m>=n )
16605     {
16606 
16607         /*
16608          * Split X: X*A = (X1 X2)^T*A
16609          */
16610         tiledsplit(m, tscur, &s1, &s2, _state);
16611         rmatrixrighttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16612         rmatrixrighttrsm(s2, n, a, i1, j1, isupper, isunit, optype, x, i2+s1, j2, _state);
16613     }
16614     else
16615     {
16616 
16617         /*
16618          * Split A:
16619          *               (A1  A12)
16620          * X*op(A) = X*op(       )
16621          *               (     A2)
16622          *
16623          * Different variants depending on
16624          * IsUpper/OpType combinations
16625          */
16626         tiledsplit(n, tscur, &s1, &s2, _state);
16627         if( isupper&&optype==0 )
16628         {
16629 
16630             /*
16631              *                  (A1  A12)-1
16632              * X*A^-1 = (X1 X2)*(       )
16633              *                  (     A2)
16634              */
16635             rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16636             rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1, j1+s1, 0, 1.0, x, i2, j2+s1, _state);
16637             rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
16638         }
16639         if( isupper&&optype!=0 )
16640         {
16641 
16642             /*
16643              *                  (A1'     )-1
16644              * X*A^-1 = (X1 X2)*(        )
16645              *                  (A12' A2')
16646              */
16647             rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
16648             rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1, j1+s1, optype, 1.0, x, i2, j2, _state);
16649             rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16650         }
16651         if( !isupper&&optype==0 )
16652         {
16653 
16654             /*
16655              *                  (A1     )-1
16656              * X*A^-1 = (X1 X2)*(       )
16657              *                  (A21  A2)
16658              */
16659             rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
16660             rmatrixgemm(m, s1, s2, -1.0, x, i2, j2+s1, 0, a, i1+s1, j1, 0, 1.0, x, i2, j2, _state);
16661             rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16662         }
16663         if( !isupper&&optype!=0 )
16664         {
16665 
16666             /*
16667              *                  (A1' A21')-1
16668              * X*A^-1 = (X1 X2)*(        )
16669              *                  (     A2')
16670              */
16671             rmatrixrighttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16672             rmatrixgemm(m, s2, s1, -1.0, x, i2, j2, 0, a, i1+s1, j1, optype, 1.0, x, i2, j2+s1, _state);
16673             rmatrixrighttrsm(m, s2, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2, j2+s1, _state);
16674         }
16675     }
16676 }
16677 
16678 
16679 /*************************************************************************
16680 Serial stub for GPL edition.
16681 *************************************************************************/
_trypexec_rmatrixrighttrsm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)16682 ae_bool _trypexec_rmatrixrighttrsm(ae_int_t m,
16683     ae_int_t n,
16684     /* Real    */ ae_matrix* a,
16685     ae_int_t i1,
16686     ae_int_t j1,
16687     ae_bool isupper,
16688     ae_bool isunit,
16689     ae_int_t optype,
16690     /* Real    */ ae_matrix* x,
16691     ae_int_t i2,
16692     ae_int_t j2,
16693     ae_state *_state)
16694 {
16695     return ae_false;
16696 }
16697 
16698 
16699 /*************************************************************************
16700 This subroutine calculates op(A^-1)*X where:
16701 * X is MxN general matrix
16702 * A is MxM upper/lower triangular/unitriangular matrix
16703 * "op" may be identity transformation, transposition
16704 Multiplication result replaces X.
16705 
16706 INPUT PARAMETERS
16707     N   -   matrix size, N>=0
16708     M   -   matrix size, N>=0
16709     A       -   matrix, actial matrix is stored in A[I1:I1+M-1,J1:J1+M-1]
16710     I1      -   submatrix offset
16711     J1      -   submatrix offset
16712     IsUpper -   whether matrix is upper triangular
16713     IsUnit  -   whether matrix is unitriangular
16714     OpType  -   transformation type:
16715                 * 0 - no transformation
16716                 * 1 - transposition
16717     X   -   matrix, actial matrix is stored in X[I2:I2+M-1,J2:J2+N-1]
16718     I2  -   submatrix offset
16719     J2  -   submatrix offset
16720 
16721   ! FREE EDITION OF ALGLIB:
16722   !
16723   ! Free Edition of ALGLIB supports following important features for  this
16724   ! function:
16725   ! * C++ version: x64 SIMD support using C++ intrinsics
16726   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
16727   !
16728   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
16729   ! Reference Manual in order  to  find  out  how to activate SIMD support
16730   ! in ALGLIB.
16731 
16732   ! COMMERCIAL EDITION OF ALGLIB:
16733   !
16734   ! Commercial Edition of ALGLIB includes following important improvements
16735   ! of this function:
16736   ! * high-performance native backend with same C# interface (C# version)
16737   ! * multithreading support (C++ and C# versions)
16738   ! * hardware vendor (Intel) implementations of linear algebra primitives
16739   !   (C++ and C# versions, x86/x64 platform)
16740   !
16741   ! We recommend you to read 'Working with commercial version' section  of
16742   ! ALGLIB Reference Manual in order to find out how to  use  performance-
16743   ! related features provided by commercial edition of ALGLIB.
16744 
16745   -- ALGLIB routine --
16746      15.12.2009-22.01.2018
16747      Bochkanov Sergey
16748 *************************************************************************/
rmatrixlefttrsm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)16749 void rmatrixlefttrsm(ae_int_t m,
16750      ae_int_t n,
16751      /* Real    */ ae_matrix* a,
16752      ae_int_t i1,
16753      ae_int_t j1,
16754      ae_bool isupper,
16755      ae_bool isunit,
16756      ae_int_t optype,
16757      /* Real    */ ae_matrix* x,
16758      ae_int_t i2,
16759      ae_int_t j2,
16760      ae_state *_state)
16761 {
16762     ae_int_t s1;
16763     ae_int_t s2;
16764     ae_int_t tsa;
16765     ae_int_t tsb;
16766     ae_int_t tscur;
16767 
16768 
16769     tsa = matrixtilesizea(_state);
16770     tsb = matrixtilesizeb(_state);
16771     tscur = tsb;
16772     if( imax2(m, n, _state)<=tsb )
16773     {
16774         tscur = tsa;
16775     }
16776     ae_assert(tscur>=1, "RMatrixLeftTRSMRec: integrity check failed", _state);
16777 
16778     /*
16779      * Upper level parallelization:
16780      * * decide whether it is feasible to activate multithreading
16781      * * perform optionally parallelized splits on N
16782      */
16783     if( n>=2*tsb&&ae_fp_greater_eq(rmul3((double)(n), (double)(m), (double)(m), _state),smpactivationlevel(_state)) )
16784     {
16785         if( _trypexec_rmatrixlefttrsm(m,n,a,i1,j1,isupper,isunit,optype,x,i2,j2, _state) )
16786         {
16787             return;
16788         }
16789     }
16790     if( n>=2*tsb )
16791     {
16792         tiledsplit(n, tscur, &s1, &s2, _state);
16793         rmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
16794         rmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16795         return;
16796     }
16797 
16798     /*
16799      * Basecase: MKL or ALGLIB code
16800      */
16801     if( imax2(m, n, _state)<=tsb )
16802     {
16803         if( rmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
16804         {
16805             return;
16806         }
16807     }
16808     if( imax2(m, n, _state)<=tsa )
16809     {
16810         ablas_rmatrixlefttrsm2(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16811         return;
16812     }
16813 
16814     /*
16815      * Recursive subdivision
16816      */
16817     if( n>=m )
16818     {
16819 
16820         /*
16821          * Split X: op(A)^-1*X = op(A)^-1*(X1 X2)
16822          */
16823         tiledsplit(n, tscur, &s1, &s2, _state);
16824         rmatrixlefttrsm(m, s1, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16825         rmatrixlefttrsm(m, s2, a, i1, j1, isupper, isunit, optype, x, i2, j2+s1, _state);
16826     }
16827     else
16828     {
16829 
16830         /*
16831          * Split A
16832          */
16833         tiledsplit(m, tscur, &s1, &s2, _state);
16834         if( isupper&&optype==0 )
16835         {
16836 
16837             /*
16838              *           (A1  A12)-1  ( X1 )
16839              * A^-1*X* = (       )   *(    )
16840              *           (     A2)    ( X2 )
16841              */
16842             rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
16843             rmatrixgemm(s1, n, s2, -1.0, a, i1, j1+s1, 0, x, i2+s1, j2, 0, 1.0, x, i2, j2, _state);
16844             rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16845         }
16846         if( isupper&&optype!=0 )
16847         {
16848 
16849             /*
16850              *          (A1'     )-1 ( X1 )
16851              * A^-1*X = (        )  *(    )
16852              *          (A12' A2')   ( X2 )
16853              */
16854             rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16855             rmatrixgemm(s2, n, s1, -1.0, a, i1, j1+s1, optype, x, i2, j2, 0, 1.0, x, i2+s1, j2, _state);
16856             rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
16857         }
16858         if( !isupper&&optype==0 )
16859         {
16860 
16861             /*
16862              *          (A1     )-1 ( X1 )
16863              * A^-1*X = (       )  *(    )
16864              *          (A21  A2)   ( X2 )
16865              */
16866             rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16867             rmatrixgemm(s2, n, s1, -1.0, a, i1+s1, j1, 0, x, i2, j2, 0, 1.0, x, i2+s1, j2, _state);
16868             rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
16869         }
16870         if( !isupper&&optype!=0 )
16871         {
16872 
16873             /*
16874              *          (A1' A21')-1 ( X1 )
16875              * A^-1*X = (        )  *(    )
16876              *          (     A2')   ( X2 )
16877              */
16878             rmatrixlefttrsm(s2, n, a, i1+s1, j1+s1, isupper, isunit, optype, x, i2+s1, j2, _state);
16879             rmatrixgemm(s1, n, s2, -1.0, a, i1+s1, j1, optype, x, i2+s1, j2, 0, 1.0, x, i2, j2, _state);
16880             rmatrixlefttrsm(s1, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state);
16881         }
16882     }
16883 }
16884 
16885 
16886 /*************************************************************************
16887 Serial stub for GPL edition.
16888 *************************************************************************/
_trypexec_rmatrixlefttrsm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)16889 ae_bool _trypexec_rmatrixlefttrsm(ae_int_t m,
16890     ae_int_t n,
16891     /* Real    */ ae_matrix* a,
16892     ae_int_t i1,
16893     ae_int_t j1,
16894     ae_bool isupper,
16895     ae_bool isunit,
16896     ae_int_t optype,
16897     /* Real    */ ae_matrix* x,
16898     ae_int_t i2,
16899     ae_int_t j2,
16900     ae_state *_state)
16901 {
16902     return ae_false;
16903 }
16904 
16905 
16906 /*************************************************************************
16907 This subroutine calculates  C=alpha*A*A^H+beta*C  or  C=alpha*A^H*A+beta*C
16908 where:
16909 * C is NxN Hermitian matrix given by its upper/lower triangle
16910 * A is NxK matrix when A*A^H is calculated, KxN matrix otherwise
16911 
16912 Additional info:
16913 * multiplication result replaces C. If Beta=0, C elements are not used in
16914   calculations (not multiplied by zero - just not referenced)
16915 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
16916 * if both Beta and Alpha are zero, C is filled by zeros.
16917 
16918 INPUT PARAMETERS
16919     N       -   matrix size, N>=0
16920     K       -   matrix size, K>=0
16921     Alpha   -   coefficient
16922     A       -   matrix
16923     IA      -   submatrix offset (row index)
16924     JA      -   submatrix offset (column index)
16925     OpTypeA -   multiplication type:
16926                 * 0 - A*A^H is calculated
16927                 * 2 - A^H*A is calculated
16928     Beta    -   coefficient
16929     C       -   preallocated input/output matrix
16930     IC      -   submatrix offset (row index)
16931     JC      -   submatrix offset (column index)
16932     IsUpper -   whether upper or lower triangle of C is updated;
16933                 this function updates only one half of C, leaving
16934                 other half unchanged (not referenced at all).
16935 
16936   ! FREE EDITION OF ALGLIB:
16937   !
16938   ! Free Edition of ALGLIB supports following important features for  this
16939   ! function:
16940   ! * C++ version: x64 SIMD support using C++ intrinsics
16941   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
16942   !
16943   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
16944   ! Reference Manual in order  to  find  out  how to activate SIMD support
16945   ! in ALGLIB.
16946 
16947   ! COMMERCIAL EDITION OF ALGLIB:
16948   !
16949   ! Commercial Edition of ALGLIB includes following important improvements
16950   ! of this function:
16951   ! * high-performance native backend with same C# interface (C# version)
16952   ! * multithreading support (C++ and C# versions)
16953   ! * hardware vendor (Intel) implementations of linear algebra primitives
16954   !   (C++ and C# versions, x86/x64 platform)
16955   !
16956   ! We recommend you to read 'Working with commercial version' section  of
16957   ! ALGLIB Reference Manual in order to find out how to  use  performance-
16958   ! related features provided by commercial edition of ALGLIB.
16959 
16960   -- ALGLIB routine --
16961      16.12.2009-22.01.2018
16962      Bochkanov Sergey
16963 *************************************************************************/
cmatrixherk(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)16964 void cmatrixherk(ae_int_t n,
16965      ae_int_t k,
16966      double alpha,
16967      /* Complex */ ae_matrix* a,
16968      ae_int_t ia,
16969      ae_int_t ja,
16970      ae_int_t optypea,
16971      double beta,
16972      /* Complex */ ae_matrix* c,
16973      ae_int_t ic,
16974      ae_int_t jc,
16975      ae_bool isupper,
16976      ae_state *_state)
16977 {
16978     ae_int_t s1;
16979     ae_int_t s2;
16980     ae_int_t tsa;
16981     ae_int_t tsb;
16982     ae_int_t tscur;
16983 
16984 
16985     tsa = matrixtilesizea(_state)/2;
16986     tsb = matrixtilesizeb(_state);
16987     tscur = tsb;
16988     if( imax2(n, k, _state)<=tsb )
16989     {
16990         tscur = tsa;
16991     }
16992     ae_assert(tscur>=1, "CMatrixHERK: integrity check failed", _state);
16993 
16994     /*
16995      * Decide whether it is feasible to activate multithreading
16996      */
16997     if( n>=2*tsb&&ae_fp_greater_eq(8*rmul3((double)(k), (double)(n), (double)(n), _state)/2,smpactivationlevel(_state)) )
16998     {
16999         if( _trypexec_cmatrixherk(n,k,alpha,a,ia,ja,optypea,beta,c,ic,jc,isupper, _state) )
17000         {
17001             return;
17002         }
17003     }
17004 
17005     /*
17006      * Use MKL or ALGLIB basecase code
17007      */
17008     if( imax2(n, k, _state)<=tsb )
17009     {
17010         if( cmatrixherkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
17011         {
17012             return;
17013         }
17014     }
17015     if( imax2(n, k, _state)<=tsa )
17016     {
17017         ablas_cmatrixherk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17018         return;
17019     }
17020 
17021     /*
17022      * Recursive division of the problem
17023      */
17024     if( k>=n )
17025     {
17026 
17027         /*
17028          * Split K
17029          */
17030         tiledsplit(k, tscur, &s1, &s2, _state);
17031         if( optypea==0 )
17032         {
17033             cmatrixherk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17034             cmatrixherk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper, _state);
17035         }
17036         else
17037         {
17038             cmatrixherk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17039             cmatrixherk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper, _state);
17040         }
17041     }
17042     else
17043     {
17044 
17045         /*
17046          * Split N
17047          */
17048         tiledsplit(n, tscur, &s1, &s2, _state);
17049         if( optypea==0&&isupper )
17050         {
17051             cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17052             cmatrixherk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
17053             cmatrixgemm(s1, s2, k, ae_complex_from_d(alpha), a, ia, ja, 0, a, ia+s1, ja, 2, ae_complex_from_d(beta), c, ic, jc+s1, _state);
17054         }
17055         if( optypea==0&&!isupper )
17056         {
17057             cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17058             cmatrixherk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
17059             cmatrixgemm(s2, s1, k, ae_complex_from_d(alpha), a, ia+s1, ja, 0, a, ia, ja, 2, ae_complex_from_d(beta), c, ic+s1, jc, _state);
17060         }
17061         if( optypea!=0&&isupper )
17062         {
17063             cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17064             cmatrixherk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
17065             cmatrixgemm(s1, s2, k, ae_complex_from_d(alpha), a, ia, ja, 2, a, ia, ja+s1, 0, ae_complex_from_d(beta), c, ic, jc+s1, _state);
17066         }
17067         if( optypea!=0&&!isupper )
17068         {
17069             cmatrixherk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17070             cmatrixherk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
17071             cmatrixgemm(s2, s1, k, ae_complex_from_d(alpha), a, ia, ja+s1, 2, a, ia, ja, 0, ae_complex_from_d(beta), c, ic+s1, jc, _state);
17072         }
17073     }
17074 }
17075 
17076 
17077 /*************************************************************************
17078 Serial stub for GPL edition.
17079 *************************************************************************/
_trypexec_cmatrixherk(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)17080 ae_bool _trypexec_cmatrixherk(ae_int_t n,
17081     ae_int_t k,
17082     double alpha,
17083     /* Complex */ ae_matrix* a,
17084     ae_int_t ia,
17085     ae_int_t ja,
17086     ae_int_t optypea,
17087     double beta,
17088     /* Complex */ ae_matrix* c,
17089     ae_int_t ic,
17090     ae_int_t jc,
17091     ae_bool isupper,
17092     ae_state *_state)
17093 {
17094     return ae_false;
17095 }
17096 
17097 
17098 /*************************************************************************
17099 This subroutine calculates  C=alpha*A*A^T+beta*C  or  C=alpha*A^T*A+beta*C
17100 where:
17101 * C is NxN symmetric matrix given by its upper/lower triangle
17102 * A is NxK matrix when A*A^T is calculated, KxN matrix otherwise
17103 
17104 Additional info:
17105 * multiplication result replaces C. If Beta=0, C elements are not used in
17106   calculations (not multiplied by zero - just not referenced)
17107 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
17108 * if both Beta and Alpha are zero, C is filled by zeros.
17109 
17110 INPUT PARAMETERS
17111     N       -   matrix size, N>=0
17112     K       -   matrix size, K>=0
17113     Alpha   -   coefficient
17114     A       -   matrix
17115     IA      -   submatrix offset (row index)
17116     JA      -   submatrix offset (column index)
17117     OpTypeA -   multiplication type:
17118                 * 0 - A*A^T is calculated
17119                 * 2 - A^T*A is calculated
17120     Beta    -   coefficient
17121     C       -   preallocated input/output matrix
17122     IC      -   submatrix offset (row index)
17123     JC      -   submatrix offset (column index)
17124     IsUpper -   whether C is upper triangular or lower triangular
17125 
17126   ! FREE EDITION OF ALGLIB:
17127   !
17128   ! Free Edition of ALGLIB supports following important features for  this
17129   ! function:
17130   ! * C++ version: x64 SIMD support using C++ intrinsics
17131   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
17132   !
17133   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
17134   ! Reference Manual in order  to  find  out  how to activate SIMD support
17135   ! in ALGLIB.
17136 
17137   ! COMMERCIAL EDITION OF ALGLIB:
17138   !
17139   ! Commercial Edition of ALGLIB includes following important improvements
17140   ! of this function:
17141   ! * high-performance native backend with same C# interface (C# version)
17142   ! * multithreading support (C++ and C# versions)
17143   ! * hardware vendor (Intel) implementations of linear algebra primitives
17144   !   (C++ and C# versions, x86/x64 platform)
17145   !
17146   ! We recommend you to read 'Working with commercial version' section  of
17147   ! ALGLIB Reference Manual in order to find out how to  use  performance-
17148   ! related features provided by commercial edition of ALGLIB.
17149 
17150   -- ALGLIB routine --
17151      16.12.2009-22.01.2018
17152      Bochkanov Sergey
17153 *************************************************************************/
rmatrixsyrk(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)17154 void rmatrixsyrk(ae_int_t n,
17155      ae_int_t k,
17156      double alpha,
17157      /* Real    */ ae_matrix* a,
17158      ae_int_t ia,
17159      ae_int_t ja,
17160      ae_int_t optypea,
17161      double beta,
17162      /* Real    */ ae_matrix* c,
17163      ae_int_t ic,
17164      ae_int_t jc,
17165      ae_bool isupper,
17166      ae_state *_state)
17167 {
17168     ae_int_t s1;
17169     ae_int_t s2;
17170     ae_int_t tsa;
17171     ae_int_t tsb;
17172     ae_int_t tscur;
17173 
17174 
17175     tsa = matrixtilesizea(_state);
17176     tsb = matrixtilesizeb(_state);
17177     tscur = tsb;
17178     if( imax2(n, k, _state)<=tsb )
17179     {
17180         tscur = tsa;
17181     }
17182     ae_assert(tscur>=1, "RMatrixSYRK: integrity check failed", _state);
17183 
17184     /*
17185      * Decide whether it is feasible to activate multithreading
17186      */
17187     if( n>=2*tsb&&ae_fp_greater_eq(2*rmul3((double)(k), (double)(n), (double)(n), _state)/2,smpactivationlevel(_state)) )
17188     {
17189         if( _trypexec_rmatrixsyrk(n,k,alpha,a,ia,ja,optypea,beta,c,ic,jc,isupper, _state) )
17190         {
17191             return;
17192         }
17193     }
17194 
17195     /*
17196      * Use MKL or generic basecase code
17197      */
17198     if( imax2(n, k, _state)<=tsb )
17199     {
17200         if( rmatrixsyrkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
17201         {
17202             return;
17203         }
17204     }
17205     if( imax2(n, k, _state)<=tsa )
17206     {
17207         ablas_rmatrixsyrk2(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17208         return;
17209     }
17210 
17211     /*
17212      * Recursive subdivision of the problem
17213      */
17214     if( k>=n )
17215     {
17216 
17217         /*
17218          * Split K
17219          */
17220         tiledsplit(k, tscur, &s1, &s2, _state);
17221         if( optypea==0 )
17222         {
17223             rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17224             rmatrixsyrk(n, s2, alpha, a, ia, ja+s1, optypea, 1.0, c, ic, jc, isupper, _state);
17225         }
17226         else
17227         {
17228             rmatrixsyrk(n, s1, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17229             rmatrixsyrk(n, s2, alpha, a, ia+s1, ja, optypea, 1.0, c, ic, jc, isupper, _state);
17230         }
17231     }
17232     else
17233     {
17234 
17235         /*
17236          * Split N
17237          */
17238         tiledsplit(n, tscur, &s1, &s2, _state);
17239         if( optypea==0&&isupper )
17240         {
17241             rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17242             rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
17243             rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 0, a, ia+s1, ja, 1, beta, c, ic, jc+s1, _state);
17244         }
17245         if( optypea==0&&!isupper )
17246         {
17247             rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17248             rmatrixsyrk(s2, k, alpha, a, ia+s1, ja, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
17249             rmatrixgemm(s2, s1, k, alpha, a, ia+s1, ja, 0, a, ia, ja, 1, beta, c, ic+s1, jc, _state);
17250         }
17251         if( optypea!=0&&isupper )
17252         {
17253             rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17254             rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
17255             rmatrixgemm(s1, s2, k, alpha, a, ia, ja, 1, a, ia, ja+s1, 0, beta, c, ic, jc+s1, _state);
17256         }
17257         if( optypea!=0&&!isupper )
17258         {
17259             rmatrixsyrk(s1, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17260             rmatrixsyrk(s2, k, alpha, a, ia, ja+s1, optypea, beta, c, ic+s1, jc+s1, isupper, _state);
17261             rmatrixgemm(s2, s1, k, alpha, a, ia, ja+s1, 1, a, ia, ja, 0, beta, c, ic+s1, jc, _state);
17262         }
17263     }
17264 }
17265 
17266 
17267 /*************************************************************************
17268 Serial stub for GPL edition.
17269 *************************************************************************/
_trypexec_rmatrixsyrk(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)17270 ae_bool _trypexec_rmatrixsyrk(ae_int_t n,
17271     ae_int_t k,
17272     double alpha,
17273     /* Real    */ ae_matrix* a,
17274     ae_int_t ia,
17275     ae_int_t ja,
17276     ae_int_t optypea,
17277     double beta,
17278     /* Real    */ ae_matrix* c,
17279     ae_int_t ic,
17280     ae_int_t jc,
17281     ae_bool isupper,
17282     ae_state *_state)
17283 {
17284     return ae_false;
17285 }
17286 
17287 
17288 /*************************************************************************
17289 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
17290 * C is MxN general matrix
17291 * op1(A) is MxK matrix
17292 * op2(B) is KxN matrix
17293 * "op" may be identity transformation, transposition, conjugate transposition
17294 
17295 Additional info:
17296 * cache-oblivious algorithm is used.
17297 * multiplication result replaces C. If Beta=0, C elements are not used in
17298   calculations (not multiplied by zero - just not referenced)
17299 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
17300 * if both Beta and Alpha are zero, C is filled by zeros.
17301 
17302 IMPORTANT:
17303 
17304 This function does NOT preallocate output matrix C, it MUST be preallocated
17305 by caller prior to calling this function. In case C does not have  enough
17306 space to store result, exception will be generated.
17307 
17308 INPUT PARAMETERS
17309     M       -   matrix size, M>0
17310     N       -   matrix size, N>0
17311     K       -   matrix size, K>0
17312     Alpha   -   coefficient
17313     A       -   matrix
17314     IA      -   submatrix offset
17315     JA      -   submatrix offset
17316     OpTypeA -   transformation type:
17317                 * 0 - no transformation
17318                 * 1 - transposition
17319                 * 2 - conjugate transposition
17320     B       -   matrix
17321     IB      -   submatrix offset
17322     JB      -   submatrix offset
17323     OpTypeB -   transformation type:
17324                 * 0 - no transformation
17325                 * 1 - transposition
17326                 * 2 - conjugate transposition
17327     Beta    -   coefficient
17328     C       -   matrix (PREALLOCATED, large enough to store result)
17329     IC      -   submatrix offset
17330     JC      -   submatrix offset
17331 
17332   ! FREE EDITION OF ALGLIB:
17333   !
17334   ! Free Edition of ALGLIB supports following important features for  this
17335   ! function:
17336   ! * C++ version: x64 SIMD support using C++ intrinsics
17337   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
17338   !
17339   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
17340   ! Reference Manual in order  to  find  out  how to activate SIMD support
17341   ! in ALGLIB.
17342 
17343   ! COMMERCIAL EDITION OF ALGLIB:
17344   !
17345   ! Commercial Edition of ALGLIB includes following important improvements
17346   ! of this function:
17347   ! * high-performance native backend with same C# interface (C# version)
17348   ! * multithreading support (C++ and C# versions)
17349   ! * hardware vendor (Intel) implementations of linear algebra primitives
17350   !   (C++ and C# versions, x86/x64 platform)
17351   !
17352   ! We recommend you to read 'Working with commercial version' section  of
17353   ! ALGLIB Reference Manual in order to find out how to  use  performance-
17354   ! related features provided by commercial edition of ALGLIB.
17355 
17356   -- ALGLIB routine --
17357      2009-2019
17358      Bochkanov Sergey
17359 *************************************************************************/
cmatrixgemm(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)17360 void cmatrixgemm(ae_int_t m,
17361      ae_int_t n,
17362      ae_int_t k,
17363      ae_complex alpha,
17364      /* Complex */ ae_matrix* a,
17365      ae_int_t ia,
17366      ae_int_t ja,
17367      ae_int_t optypea,
17368      /* Complex */ ae_matrix* b,
17369      ae_int_t ib,
17370      ae_int_t jb,
17371      ae_int_t optypeb,
17372      ae_complex beta,
17373      /* Complex */ ae_matrix* c,
17374      ae_int_t ic,
17375      ae_int_t jc,
17376      ae_state *_state)
17377 {
17378     ae_int_t ts;
17379 
17380 
17381     ts = matrixtilesizeb(_state);
17382 
17383     /*
17384      * Check input sizes for correctness
17385      */
17386     ae_assert((optypea==0||optypea==1)||optypea==2, "CMatrixGEMM: incorrect OpTypeA (must be 0 or 1 or 2)", _state);
17387     ae_assert((optypeb==0||optypeb==1)||optypeb==2, "CMatrixGEMM: incorrect OpTypeB (must be 0 or 1 or 2)", _state);
17388     ae_assert(ic+m<=c->rows, "CMatrixGEMM: incorect size of output matrix C", _state);
17389     ae_assert(jc+n<=c->cols, "CMatrixGEMM: incorect size of output matrix C", _state);
17390 
17391     /*
17392      * Decide whether it is feasible to activate multithreading
17393      */
17394     if( (m>=2*ts||n>=2*ts)&&ae_fp_greater_eq(8*rmul3((double)(m), (double)(n), (double)(k), _state),smpactivationlevel(_state)) )
17395     {
17396         if( _trypexec_cmatrixgemm(m,n,k,alpha,a,ia,ja,optypea,b,ib,jb,optypeb,beta,c,ic,jc, _state) )
17397         {
17398             return;
17399         }
17400     }
17401 
17402     /*
17403      * Start actual work
17404      */
17405     ablas_cmatrixgemmrec(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
17406 }
17407 
17408 
17409 /*************************************************************************
17410 Serial stub for GPL edition.
17411 *************************************************************************/
_trypexec_cmatrixgemm(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)17412 ae_bool _trypexec_cmatrixgemm(ae_int_t m,
17413     ae_int_t n,
17414     ae_int_t k,
17415     ae_complex alpha,
17416     /* Complex */ ae_matrix* a,
17417     ae_int_t ia,
17418     ae_int_t ja,
17419     ae_int_t optypea,
17420     /* Complex */ ae_matrix* b,
17421     ae_int_t ib,
17422     ae_int_t jb,
17423     ae_int_t optypeb,
17424     ae_complex beta,
17425     /* Complex */ ae_matrix* c,
17426     ae_int_t ic,
17427     ae_int_t jc,
17428     ae_state *_state)
17429 {
17430     return ae_false;
17431 }
17432 
17433 
17434 /*************************************************************************
17435 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
17436 * C is MxN general matrix
17437 * op1(A) is MxK matrix
17438 * op2(B) is KxN matrix
17439 * "op" may be identity transformation, transposition
17440 
17441 Additional info:
17442 * cache-oblivious algorithm is used.
17443 * multiplication result replaces C. If Beta=0, C elements are not used in
17444   calculations (not multiplied by zero - just not referenced)
17445 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
17446 * if both Beta and Alpha are zero, C is filled by zeros.
17447 
17448 IMPORTANT:
17449 
17450 This function does NOT preallocate output matrix C, it MUST be preallocated
17451 by caller prior to calling this function. In case C does not have  enough
17452 space to store result, exception will be generated.
17453 
17454 INPUT PARAMETERS
17455     M       -   matrix size, M>0
17456     N       -   matrix size, N>0
17457     K       -   matrix size, K>0
17458     Alpha   -   coefficient
17459     A       -   matrix
17460     IA      -   submatrix offset
17461     JA      -   submatrix offset
17462     OpTypeA -   transformation type:
17463                 * 0 - no transformation
17464                 * 1 - transposition
17465     B       -   matrix
17466     IB      -   submatrix offset
17467     JB      -   submatrix offset
17468     OpTypeB -   transformation type:
17469                 * 0 - no transformation
17470                 * 1 - transposition
17471     Beta    -   coefficient
17472     C       -   PREALLOCATED output matrix, large enough to store result
17473     IC      -   submatrix offset
17474     JC      -   submatrix offset
17475 
17476   ! FREE EDITION OF ALGLIB:
17477   !
17478   ! Free Edition of ALGLIB supports following important features for  this
17479   ! function:
17480   ! * C++ version: x64 SIMD support using C++ intrinsics
17481   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
17482   !
17483   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
17484   ! Reference Manual in order  to  find  out  how to activate SIMD support
17485   ! in ALGLIB.
17486 
17487   ! COMMERCIAL EDITION OF ALGLIB:
17488   !
17489   ! Commercial Edition of ALGLIB includes following important improvements
17490   ! of this function:
17491   ! * high-performance native backend with same C# interface (C# version)
17492   ! * multithreading support (C++ and C# versions)
17493   ! * hardware vendor (Intel) implementations of linear algebra primitives
17494   !   (C++ and C# versions, x86/x64 platform)
17495   !
17496   ! We recommend you to read 'Working with commercial version' section  of
17497   ! ALGLIB Reference Manual in order to find out how to  use  performance-
17498   ! related features provided by commercial edition of ALGLIB.
17499 
17500   -- ALGLIB routine --
17501      2009-2019
17502      Bochkanov Sergey
17503 *************************************************************************/
rmatrixgemm(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)17504 void rmatrixgemm(ae_int_t m,
17505      ae_int_t n,
17506      ae_int_t k,
17507      double alpha,
17508      /* Real    */ ae_matrix* a,
17509      ae_int_t ia,
17510      ae_int_t ja,
17511      ae_int_t optypea,
17512      /* Real    */ ae_matrix* b,
17513      ae_int_t ib,
17514      ae_int_t jb,
17515      ae_int_t optypeb,
17516      double beta,
17517      /* Real    */ ae_matrix* c,
17518      ae_int_t ic,
17519      ae_int_t jc,
17520      ae_state *_state)
17521 {
17522     ae_int_t ts;
17523 
17524 
17525     ts = matrixtilesizeb(_state);
17526 
17527     /*
17528      * Check input sizes for correctness
17529      */
17530     ae_assert(optypea==0||optypea==1, "RMatrixGEMM: incorrect OpTypeA (must be 0 or 1)", _state);
17531     ae_assert(optypeb==0||optypeb==1, "RMatrixGEMM: incorrect OpTypeB (must be 0 or 1)", _state);
17532     ae_assert(ic+m<=c->rows, "RMatrixGEMM: incorect size of output matrix C", _state);
17533     ae_assert(jc+n<=c->cols, "RMatrixGEMM: incorect size of output matrix C", _state);
17534 
17535     /*
17536      * Decide whether it is feasible to activate multithreading
17537      */
17538     if( (m>=2*ts||n>=2*ts)&&ae_fp_greater_eq(2*rmul3((double)(m), (double)(n), (double)(k), _state),smpactivationlevel(_state)) )
17539     {
17540         if( _trypexec_rmatrixgemm(m,n,k,alpha,a,ia,ja,optypea,b,ib,jb,optypeb,beta,c,ic,jc, _state) )
17541         {
17542             return;
17543         }
17544     }
17545 
17546     /*
17547      * Start actual work
17548      */
17549     ablas_rmatrixgemmrec(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
17550 }
17551 
17552 
17553 /*************************************************************************
17554 Serial stub for GPL edition.
17555 *************************************************************************/
_trypexec_rmatrixgemm(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)17556 ae_bool _trypexec_rmatrixgemm(ae_int_t m,
17557     ae_int_t n,
17558     ae_int_t k,
17559     double alpha,
17560     /* Real    */ ae_matrix* a,
17561     ae_int_t ia,
17562     ae_int_t ja,
17563     ae_int_t optypea,
17564     /* Real    */ ae_matrix* b,
17565     ae_int_t ib,
17566     ae_int_t jb,
17567     ae_int_t optypeb,
17568     double beta,
17569     /* Real    */ ae_matrix* c,
17570     ae_int_t ic,
17571     ae_int_t jc,
17572     ae_state *_state)
17573 {
17574     return ae_false;
17575 }
17576 
17577 
17578 /*************************************************************************
17579 This subroutine is an older version of CMatrixHERK(), one with wrong  name
17580 (it is HErmitian update, not SYmmetric). It  is  left  here  for  backward
17581 compatibility.
17582 
17583   -- ALGLIB routine --
17584      16.12.2009
17585      Bochkanov Sergey
17586 *************************************************************************/
cmatrixsyrk(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)17587 void cmatrixsyrk(ae_int_t n,
17588      ae_int_t k,
17589      double alpha,
17590      /* Complex */ ae_matrix* a,
17591      ae_int_t ia,
17592      ae_int_t ja,
17593      ae_int_t optypea,
17594      double beta,
17595      /* Complex */ ae_matrix* c,
17596      ae_int_t ic,
17597      ae_int_t jc,
17598      ae_bool isupper,
17599      ae_state *_state)
17600 {
17601 
17602 
17603     cmatrixherk(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state);
17604 }
17605 
17606 
17607 /*************************************************************************
17608 Performs one step  of stable Gram-Schmidt  process  on  vector  X[]  using
17609 set of orthonormal rows Q[].
17610 
17611 INPUT PARAMETERS:
17612     Q       -   array[M,N], matrix with orthonormal rows
17613     M, N    -   rows/cols
17614     X       -   array[N], vector to process
17615     NeedQX  -   whether we need QX or not
17616 
17617 OUTPUT PARAMETERS:
17618     X       -   stores X - Q'*(Q*X)
17619     QX      -   if NeedQX is True, array[M] filled with elements  of  Q*X,
17620                 reallocated if length is less than M.
17621                 Ignored otherwise.
17622 
17623   -- ALGLIB --
17624      Copyright 20.01.2020 by Bochkanov Sergey
17625 *************************************************************************/
rowwisegramschmidt(ae_matrix * q,ae_int_t m,ae_int_t n,ae_vector * x,ae_vector * qx,ae_bool needqx,ae_state * _state)17626 void rowwisegramschmidt(/* Real    */ ae_matrix* q,
17627      ae_int_t m,
17628      ae_int_t n,
17629      /* Real    */ ae_vector* x,
17630      /* Real    */ ae_vector* qx,
17631      ae_bool needqx,
17632      ae_state *_state)
17633 {
17634     ae_int_t i;
17635     double v;
17636 
17637 
17638     if( needqx )
17639     {
17640         rvectorsetlengthatleast(qx, m, _state);
17641     }
17642     for(i=0; i<=m-1; i++)
17643     {
17644         v = rdotvr(n, x, q, i, _state);
17645         raddrv(n, -v, q, i, x, _state);
17646         if( needqx )
17647         {
17648             qx->ptr.p_double[i] = v;
17649         }
17650     }
17651 }
17652 
17653 
17654 /*************************************************************************
17655 Complex ABLASSplitLength
17656 
17657   -- ALGLIB routine --
17658      15.12.2009
17659      Bochkanov Sergey
17660 *************************************************************************/
ablas_ablasinternalsplitlength(ae_int_t n,ae_int_t nb,ae_int_t * n1,ae_int_t * n2,ae_state * _state)17661 static void ablas_ablasinternalsplitlength(ae_int_t n,
17662      ae_int_t nb,
17663      ae_int_t* n1,
17664      ae_int_t* n2,
17665      ae_state *_state)
17666 {
17667     ae_int_t r;
17668 
17669     *n1 = 0;
17670     *n2 = 0;
17671 
17672     if( n<=nb )
17673     {
17674 
17675         /*
17676          * Block size, no further splitting
17677          */
17678         *n1 = n;
17679         *n2 = 0;
17680     }
17681     else
17682     {
17683 
17684         /*
17685          * Greater than block size
17686          */
17687         if( n%nb!=0 )
17688         {
17689 
17690             /*
17691              * Split remainder
17692              */
17693             *n2 = n%nb;
17694             *n1 = n-(*n2);
17695         }
17696         else
17697         {
17698 
17699             /*
17700              * Split on block boundaries
17701              */
17702             *n2 = n/2;
17703             *n1 = n-(*n2);
17704             if( *n1%nb==0 )
17705             {
17706                 return;
17707             }
17708             r = nb-*n1%nb;
17709             *n1 = *n1+r;
17710             *n2 = *n2-r;
17711         }
17712     }
17713 }
17714 
17715 
17716 /*************************************************************************
17717 Level 2 variant of CMatrixRightTRSM
17718 *************************************************************************/
ablas_cmatrixrighttrsm2(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)17719 static void ablas_cmatrixrighttrsm2(ae_int_t m,
17720      ae_int_t n,
17721      /* Complex */ ae_matrix* a,
17722      ae_int_t i1,
17723      ae_int_t j1,
17724      ae_bool isupper,
17725      ae_bool isunit,
17726      ae_int_t optype,
17727      /* Complex */ ae_matrix* x,
17728      ae_int_t i2,
17729      ae_int_t j2,
17730      ae_state *_state)
17731 {
17732     ae_int_t i;
17733     ae_int_t j;
17734     ae_complex vc;
17735     ae_complex vd;
17736 
17737 
17738 
17739     /*
17740      * Special case
17741      */
17742     if( n*m==0 )
17743     {
17744         return;
17745     }
17746 
17747     /*
17748      * Try to call fast TRSM
17749      */
17750     if( cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
17751     {
17752         return;
17753     }
17754 
17755     /*
17756      * General case
17757      */
17758     if( isupper )
17759     {
17760 
17761         /*
17762          * Upper triangular matrix
17763          */
17764         if( optype==0 )
17765         {
17766 
17767             /*
17768              * X*A^(-1)
17769              */
17770             for(i=0; i<=m-1; i++)
17771             {
17772                 for(j=0; j<=n-1; j++)
17773                 {
17774                     if( isunit )
17775                     {
17776                         vd = ae_complex_from_i(1);
17777                     }
17778                     else
17779                     {
17780                         vd = a->ptr.pp_complex[i1+j][j1+j];
17781                     }
17782                     x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(x->ptr.pp_complex[i2+i][j2+j],vd);
17783                     if( j<n-1 )
17784                     {
17785                         vc = x->ptr.pp_complex[i2+i][j2+j];
17786                         ae_v_csubc(&x->ptr.pp_complex[i2+i][j2+j+1], 1, &a->ptr.pp_complex[i1+j][j1+j+1], 1, "N", ae_v_len(j2+j+1,j2+n-1), vc);
17787                     }
17788                 }
17789             }
17790             return;
17791         }
17792         if( optype==1 )
17793         {
17794 
17795             /*
17796              * X*A^(-T)
17797              */
17798             for(i=0; i<=m-1; i++)
17799             {
17800                 for(j=n-1; j>=0; j--)
17801                 {
17802                     vc = ae_complex_from_i(0);
17803                     vd = ae_complex_from_i(1);
17804                     if( j<n-1 )
17805                     {
17806                         vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2+j+1], 1, "N", &a->ptr.pp_complex[i1+j][j1+j+1], 1, "N", ae_v_len(j2+j+1,j2+n-1));
17807                     }
17808                     if( !isunit )
17809                     {
17810                         vd = a->ptr.pp_complex[i1+j][j1+j];
17811                     }
17812                     x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
17813                 }
17814             }
17815             return;
17816         }
17817         if( optype==2 )
17818         {
17819 
17820             /*
17821              * X*A^(-H)
17822              */
17823             for(i=0; i<=m-1; i++)
17824             {
17825                 for(j=n-1; j>=0; j--)
17826                 {
17827                     vc = ae_complex_from_i(0);
17828                     vd = ae_complex_from_i(1);
17829                     if( j<n-1 )
17830                     {
17831                         vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2+j+1], 1, "N", &a->ptr.pp_complex[i1+j][j1+j+1], 1, "Conj", ae_v_len(j2+j+1,j2+n-1));
17832                     }
17833                     if( !isunit )
17834                     {
17835                         vd = ae_c_conj(a->ptr.pp_complex[i1+j][j1+j], _state);
17836                     }
17837                     x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
17838                 }
17839             }
17840             return;
17841         }
17842     }
17843     else
17844     {
17845 
17846         /*
17847          * Lower triangular matrix
17848          */
17849         if( optype==0 )
17850         {
17851 
17852             /*
17853              * X*A^(-1)
17854              */
17855             for(i=0; i<=m-1; i++)
17856             {
17857                 for(j=n-1; j>=0; j--)
17858                 {
17859                     if( isunit )
17860                     {
17861                         vd = ae_complex_from_i(1);
17862                     }
17863                     else
17864                     {
17865                         vd = a->ptr.pp_complex[i1+j][j1+j];
17866                     }
17867                     x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(x->ptr.pp_complex[i2+i][j2+j],vd);
17868                     if( j>0 )
17869                     {
17870                         vc = x->ptr.pp_complex[i2+i][j2+j];
17871                         ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &a->ptr.pp_complex[i1+j][j1], 1, "N", ae_v_len(j2,j2+j-1), vc);
17872                     }
17873                 }
17874             }
17875             return;
17876         }
17877         if( optype==1 )
17878         {
17879 
17880             /*
17881              * X*A^(-T)
17882              */
17883             for(i=0; i<=m-1; i++)
17884             {
17885                 for(j=0; j<=n-1; j++)
17886                 {
17887                     vc = ae_complex_from_i(0);
17888                     vd = ae_complex_from_i(1);
17889                     if( j>0 )
17890                     {
17891                         vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2], 1, "N", &a->ptr.pp_complex[i1+j][j1], 1, "N", ae_v_len(j2,j2+j-1));
17892                     }
17893                     if( !isunit )
17894                     {
17895                         vd = a->ptr.pp_complex[i1+j][j1+j];
17896                     }
17897                     x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
17898                 }
17899             }
17900             return;
17901         }
17902         if( optype==2 )
17903         {
17904 
17905             /*
17906              * X*A^(-H)
17907              */
17908             for(i=0; i<=m-1; i++)
17909             {
17910                 for(j=0; j<=n-1; j++)
17911                 {
17912                     vc = ae_complex_from_i(0);
17913                     vd = ae_complex_from_i(1);
17914                     if( j>0 )
17915                     {
17916                         vc = ae_v_cdotproduct(&x->ptr.pp_complex[i2+i][j2], 1, "N", &a->ptr.pp_complex[i1+j][j1], 1, "Conj", ae_v_len(j2,j2+j-1));
17917                     }
17918                     if( !isunit )
17919                     {
17920                         vd = ae_c_conj(a->ptr.pp_complex[i1+j][j1+j], _state);
17921                     }
17922                     x->ptr.pp_complex[i2+i][j2+j] = ae_c_div(ae_c_sub(x->ptr.pp_complex[i2+i][j2+j],vc),vd);
17923                 }
17924             }
17925             return;
17926         }
17927     }
17928 }
17929 
17930 
17931 /*************************************************************************
17932 Level-2 subroutine
17933 *************************************************************************/
ablas_cmatrixlefttrsm2(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)17934 static void ablas_cmatrixlefttrsm2(ae_int_t m,
17935      ae_int_t n,
17936      /* Complex */ ae_matrix* a,
17937      ae_int_t i1,
17938      ae_int_t j1,
17939      ae_bool isupper,
17940      ae_bool isunit,
17941      ae_int_t optype,
17942      /* Complex */ ae_matrix* x,
17943      ae_int_t i2,
17944      ae_int_t j2,
17945      ae_state *_state)
17946 {
17947     ae_int_t i;
17948     ae_int_t j;
17949     ae_complex vc;
17950     ae_complex vd;
17951 
17952 
17953 
17954     /*
17955      * Special case
17956      */
17957     if( n*m==0 )
17958     {
17959         return;
17960     }
17961 
17962     /*
17963      * Try to call fast TRSM
17964      */
17965     if( cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
17966     {
17967         return;
17968     }
17969 
17970     /*
17971      * General case
17972      */
17973     if( isupper )
17974     {
17975 
17976         /*
17977          * Upper triangular matrix
17978          */
17979         if( optype==0 )
17980         {
17981 
17982             /*
17983              * A^(-1)*X
17984              */
17985             for(i=m-1; i>=0; i--)
17986             {
17987                 for(j=i+1; j<=m-1; j++)
17988                 {
17989                     vc = a->ptr.pp_complex[i1+i][j1+j];
17990                     ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &x->ptr.pp_complex[i2+j][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
17991                 }
17992                 if( !isunit )
17993                 {
17994                     vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]);
17995                     ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
17996                 }
17997             }
17998             return;
17999         }
18000         if( optype==1 )
18001         {
18002 
18003             /*
18004              * A^(-T)*X
18005              */
18006             for(i=0; i<=m-1; i++)
18007             {
18008                 if( isunit )
18009                 {
18010                     vd = ae_complex_from_i(1);
18011                 }
18012                 else
18013                 {
18014                     vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]);
18015                 }
18016                 ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
18017                 for(j=i+1; j<=m-1; j++)
18018                 {
18019                     vc = a->ptr.pp_complex[i1+i][j1+j];
18020                     ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
18021                 }
18022             }
18023             return;
18024         }
18025         if( optype==2 )
18026         {
18027 
18028             /*
18029              * A^(-H)*X
18030              */
18031             for(i=0; i<=m-1; i++)
18032             {
18033                 if( isunit )
18034                 {
18035                     vd = ae_complex_from_i(1);
18036                 }
18037                 else
18038                 {
18039                     vd = ae_c_d_div(1,ae_c_conj(a->ptr.pp_complex[i1+i][j1+i], _state));
18040                 }
18041                 ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
18042                 for(j=i+1; j<=m-1; j++)
18043                 {
18044                     vc = ae_c_conj(a->ptr.pp_complex[i1+i][j1+j], _state);
18045                     ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
18046                 }
18047             }
18048             return;
18049         }
18050     }
18051     else
18052     {
18053 
18054         /*
18055          * Lower triangular matrix
18056          */
18057         if( optype==0 )
18058         {
18059 
18060             /*
18061              * A^(-1)*X
18062              */
18063             for(i=0; i<=m-1; i++)
18064             {
18065                 for(j=0; j<=i-1; j++)
18066                 {
18067                     vc = a->ptr.pp_complex[i1+i][j1+j];
18068                     ae_v_csubc(&x->ptr.pp_complex[i2+i][j2], 1, &x->ptr.pp_complex[i2+j][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
18069                 }
18070                 if( isunit )
18071                 {
18072                     vd = ae_complex_from_i(1);
18073                 }
18074                 else
18075                 {
18076                     vd = ae_c_d_div(1,a->ptr.pp_complex[i1+j][j1+j]);
18077                 }
18078                 ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
18079             }
18080             return;
18081         }
18082         if( optype==1 )
18083         {
18084 
18085             /*
18086              * A^(-T)*X
18087              */
18088             for(i=m-1; i>=0; i--)
18089             {
18090                 if( isunit )
18091                 {
18092                     vd = ae_complex_from_i(1);
18093                 }
18094                 else
18095                 {
18096                     vd = ae_c_d_div(1,a->ptr.pp_complex[i1+i][j1+i]);
18097                 }
18098                 ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
18099                 for(j=i-1; j>=0; j--)
18100                 {
18101                     vc = a->ptr.pp_complex[i1+i][j1+j];
18102                     ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
18103                 }
18104             }
18105             return;
18106         }
18107         if( optype==2 )
18108         {
18109 
18110             /*
18111              * A^(-H)*X
18112              */
18113             for(i=m-1; i>=0; i--)
18114             {
18115                 if( isunit )
18116                 {
18117                     vd = ae_complex_from_i(1);
18118                 }
18119                 else
18120                 {
18121                     vd = ae_c_d_div(1,ae_c_conj(a->ptr.pp_complex[i1+i][j1+i], _state));
18122                 }
18123                 ae_v_cmulc(&x->ptr.pp_complex[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
18124                 for(j=i-1; j>=0; j--)
18125                 {
18126                     vc = ae_c_conj(a->ptr.pp_complex[i1+i][j1+j], _state);
18127                     ae_v_csubc(&x->ptr.pp_complex[i2+j][j2], 1, &x->ptr.pp_complex[i2+i][j2], 1, "N", ae_v_len(j2,j2+n-1), vc);
18128                 }
18129             }
18130             return;
18131         }
18132     }
18133 }
18134 
18135 
18136 /*************************************************************************
18137 Level 2 subroutine
18138 
18139   -- ALGLIB routine --
18140      15.12.2009
18141      Bochkanov Sergey
18142 *************************************************************************/
ablas_rmatrixrighttrsm2(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)18143 static void ablas_rmatrixrighttrsm2(ae_int_t m,
18144      ae_int_t n,
18145      /* Real    */ ae_matrix* a,
18146      ae_int_t i1,
18147      ae_int_t j1,
18148      ae_bool isupper,
18149      ae_bool isunit,
18150      ae_int_t optype,
18151      /* Real    */ ae_matrix* x,
18152      ae_int_t i2,
18153      ae_int_t j2,
18154      ae_state *_state)
18155 {
18156     ae_int_t i;
18157     ae_int_t j;
18158     double vr;
18159     double vd;
18160 
18161 
18162 
18163     /*
18164      * Special case
18165      */
18166     if( n*m==0 )
18167     {
18168         return;
18169     }
18170 
18171     /*
18172      * Try to use "fast" code
18173      */
18174     if( rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
18175     {
18176         return;
18177     }
18178 
18179     /*
18180      * General case
18181      */
18182     if( isupper )
18183     {
18184 
18185         /*
18186          * Upper triangular matrix
18187          */
18188         if( optype==0 )
18189         {
18190 
18191             /*
18192              * X*A^(-1)
18193              */
18194             for(i=0; i<=m-1; i++)
18195             {
18196                 for(j=0; j<=n-1; j++)
18197                 {
18198                     if( isunit )
18199                     {
18200                         vd = (double)(1);
18201                     }
18202                     else
18203                     {
18204                         vd = a->ptr.pp_double[i1+j][j1+j];
18205                     }
18206                     x->ptr.pp_double[i2+i][j2+j] = x->ptr.pp_double[i2+i][j2+j]/vd;
18207                     if( j<n-1 )
18208                     {
18209                         vr = x->ptr.pp_double[i2+i][j2+j];
18210                         ae_v_subd(&x->ptr.pp_double[i2+i][j2+j+1], 1, &a->ptr.pp_double[i1+j][j1+j+1], 1, ae_v_len(j2+j+1,j2+n-1), vr);
18211                     }
18212                 }
18213             }
18214             return;
18215         }
18216         if( optype==1 )
18217         {
18218 
18219             /*
18220              * X*A^(-T)
18221              */
18222             for(i=0; i<=m-1; i++)
18223             {
18224                 for(j=n-1; j>=0; j--)
18225                 {
18226                     vr = (double)(0);
18227                     vd = (double)(1);
18228                     if( j<n-1 )
18229                     {
18230                         vr = ae_v_dotproduct(&x->ptr.pp_double[i2+i][j2+j+1], 1, &a->ptr.pp_double[i1+j][j1+j+1], 1, ae_v_len(j2+j+1,j2+n-1));
18231                     }
18232                     if( !isunit )
18233                     {
18234                         vd = a->ptr.pp_double[i1+j][j1+j];
18235                     }
18236                     x->ptr.pp_double[i2+i][j2+j] = (x->ptr.pp_double[i2+i][j2+j]-vr)/vd;
18237                 }
18238             }
18239             return;
18240         }
18241     }
18242     else
18243     {
18244 
18245         /*
18246          * Lower triangular matrix
18247          */
18248         if( optype==0 )
18249         {
18250 
18251             /*
18252              * X*A^(-1)
18253              */
18254             for(i=0; i<=m-1; i++)
18255             {
18256                 for(j=n-1; j>=0; j--)
18257                 {
18258                     if( isunit )
18259                     {
18260                         vd = (double)(1);
18261                     }
18262                     else
18263                     {
18264                         vd = a->ptr.pp_double[i1+j][j1+j];
18265                     }
18266                     x->ptr.pp_double[i2+i][j2+j] = x->ptr.pp_double[i2+i][j2+j]/vd;
18267                     if( j>0 )
18268                     {
18269                         vr = x->ptr.pp_double[i2+i][j2+j];
18270                         ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &a->ptr.pp_double[i1+j][j1], 1, ae_v_len(j2,j2+j-1), vr);
18271                     }
18272                 }
18273             }
18274             return;
18275         }
18276         if( optype==1 )
18277         {
18278 
18279             /*
18280              * X*A^(-T)
18281              */
18282             for(i=0; i<=m-1; i++)
18283             {
18284                 for(j=0; j<=n-1; j++)
18285                 {
18286                     vr = (double)(0);
18287                     vd = (double)(1);
18288                     if( j>0 )
18289                     {
18290                         vr = ae_v_dotproduct(&x->ptr.pp_double[i2+i][j2], 1, &a->ptr.pp_double[i1+j][j1], 1, ae_v_len(j2,j2+j-1));
18291                     }
18292                     if( !isunit )
18293                     {
18294                         vd = a->ptr.pp_double[i1+j][j1+j];
18295                     }
18296                     x->ptr.pp_double[i2+i][j2+j] = (x->ptr.pp_double[i2+i][j2+j]-vr)/vd;
18297                 }
18298             }
18299             return;
18300         }
18301     }
18302 }
18303 
18304 
18305 /*************************************************************************
18306 Level 2 subroutine
18307 *************************************************************************/
ablas_rmatrixlefttrsm2(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)18308 static void ablas_rmatrixlefttrsm2(ae_int_t m,
18309      ae_int_t n,
18310      /* Real    */ ae_matrix* a,
18311      ae_int_t i1,
18312      ae_int_t j1,
18313      ae_bool isupper,
18314      ae_bool isunit,
18315      ae_int_t optype,
18316      /* Real    */ ae_matrix* x,
18317      ae_int_t i2,
18318      ae_int_t j2,
18319      ae_state *_state)
18320 {
18321     ae_int_t i;
18322     ae_int_t j;
18323     double vr;
18324     double vd;
18325 
18326 
18327 
18328     /*
18329      * Special case
18330      */
18331     if( n==0||m==0 )
18332     {
18333         return;
18334     }
18335 
18336     /*
18337      * Try fast code
18338      */
18339     if( rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2, _state) )
18340     {
18341         return;
18342     }
18343 
18344     /*
18345      * General case
18346      */
18347     if( isupper )
18348     {
18349 
18350         /*
18351          * Upper triangular matrix
18352          */
18353         if( optype==0 )
18354         {
18355 
18356             /*
18357              * A^(-1)*X
18358              */
18359             for(i=m-1; i>=0; i--)
18360             {
18361                 for(j=i+1; j<=m-1; j++)
18362                 {
18363                     vr = a->ptr.pp_double[i1+i][j1+j];
18364                     ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &x->ptr.pp_double[i2+j][j2], 1, ae_v_len(j2,j2+n-1), vr);
18365                 }
18366                 if( !isunit )
18367                 {
18368                     vd = 1/a->ptr.pp_double[i1+i][j1+i];
18369                     ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
18370                 }
18371             }
18372             return;
18373         }
18374         if( optype==1 )
18375         {
18376 
18377             /*
18378              * A^(-T)*X
18379              */
18380             for(i=0; i<=m-1; i++)
18381             {
18382                 if( isunit )
18383                 {
18384                     vd = (double)(1);
18385                 }
18386                 else
18387                 {
18388                     vd = 1/a->ptr.pp_double[i1+i][j1+i];
18389                 }
18390                 ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
18391                 for(j=i+1; j<=m-1; j++)
18392                 {
18393                     vr = a->ptr.pp_double[i1+i][j1+j];
18394                     ae_v_subd(&x->ptr.pp_double[i2+j][j2], 1, &x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vr);
18395                 }
18396             }
18397             return;
18398         }
18399     }
18400     else
18401     {
18402 
18403         /*
18404          * Lower triangular matrix
18405          */
18406         if( optype==0 )
18407         {
18408 
18409             /*
18410              * A^(-1)*X
18411              */
18412             for(i=0; i<=m-1; i++)
18413             {
18414                 for(j=0; j<=i-1; j++)
18415                 {
18416                     vr = a->ptr.pp_double[i1+i][j1+j];
18417                     ae_v_subd(&x->ptr.pp_double[i2+i][j2], 1, &x->ptr.pp_double[i2+j][j2], 1, ae_v_len(j2,j2+n-1), vr);
18418                 }
18419                 if( isunit )
18420                 {
18421                     vd = (double)(1);
18422                 }
18423                 else
18424                 {
18425                     vd = 1/a->ptr.pp_double[i1+j][j1+j];
18426                 }
18427                 ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
18428             }
18429             return;
18430         }
18431         if( optype==1 )
18432         {
18433 
18434             /*
18435              * A^(-T)*X
18436              */
18437             for(i=m-1; i>=0; i--)
18438             {
18439                 if( isunit )
18440                 {
18441                     vd = (double)(1);
18442                 }
18443                 else
18444                 {
18445                     vd = 1/a->ptr.pp_double[i1+i][j1+i];
18446                 }
18447                 ae_v_muld(&x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vd);
18448                 for(j=i-1; j>=0; j--)
18449                 {
18450                     vr = a->ptr.pp_double[i1+i][j1+j];
18451                     ae_v_subd(&x->ptr.pp_double[i2+j][j2], 1, &x->ptr.pp_double[i2+i][j2], 1, ae_v_len(j2,j2+n-1), vr);
18452                 }
18453             }
18454             return;
18455         }
18456     }
18457 }
18458 
18459 
18460 /*************************************************************************
18461 Level 2 subroutine
18462 *************************************************************************/
ablas_cmatrixherk2(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)18463 static void ablas_cmatrixherk2(ae_int_t n,
18464      ae_int_t k,
18465      double alpha,
18466      /* Complex */ ae_matrix* a,
18467      ae_int_t ia,
18468      ae_int_t ja,
18469      ae_int_t optypea,
18470      double beta,
18471      /* Complex */ ae_matrix* c,
18472      ae_int_t ic,
18473      ae_int_t jc,
18474      ae_bool isupper,
18475      ae_state *_state)
18476 {
18477     ae_int_t i;
18478     ae_int_t j;
18479     ae_int_t j1;
18480     ae_int_t j2;
18481     ae_complex v;
18482 
18483 
18484 
18485     /*
18486      * Fast exit (nothing to be done)
18487      */
18488     if( (ae_fp_eq(alpha,(double)(0))||k==0)&&ae_fp_eq(beta,(double)(1)) )
18489     {
18490         return;
18491     }
18492 
18493     /*
18494      * Try to call fast SYRK
18495      */
18496     if( cmatrixherkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
18497     {
18498         return;
18499     }
18500 
18501     /*
18502      * SYRK
18503      */
18504     if( optypea==0 )
18505     {
18506 
18507         /*
18508          * C=alpha*A*A^H+beta*C
18509          */
18510         for(i=0; i<=n-1; i++)
18511         {
18512             if( isupper )
18513             {
18514                 j1 = i;
18515                 j2 = n-1;
18516             }
18517             else
18518             {
18519                 j1 = 0;
18520                 j2 = i;
18521             }
18522             for(j=j1; j<=j2; j++)
18523             {
18524                 if( ae_fp_neq(alpha,(double)(0))&&k>0 )
18525                 {
18526                     v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+i][ja], 1, "N", &a->ptr.pp_complex[ia+j][ja], 1, "Conj", ae_v_len(ja,ja+k-1));
18527                 }
18528                 else
18529                 {
18530                     v = ae_complex_from_i(0);
18531                 }
18532                 if( ae_fp_eq(beta,(double)(0)) )
18533                 {
18534                     c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul_d(v,alpha);
18535                 }
18536                 else
18537                 {
18538                     c->ptr.pp_complex[ic+i][jc+j] = ae_c_add(ae_c_mul_d(c->ptr.pp_complex[ic+i][jc+j],beta),ae_c_mul_d(v,alpha));
18539                 }
18540             }
18541         }
18542         return;
18543     }
18544     else
18545     {
18546 
18547         /*
18548          * C=alpha*A^H*A+beta*C
18549          */
18550         for(i=0; i<=n-1; i++)
18551         {
18552             if( isupper )
18553             {
18554                 j1 = i;
18555                 j2 = n-1;
18556             }
18557             else
18558             {
18559                 j1 = 0;
18560                 j2 = i;
18561             }
18562             if( ae_fp_eq(beta,(double)(0)) )
18563             {
18564                 for(j=j1; j<=j2; j++)
18565                 {
18566                     c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_i(0);
18567                 }
18568             }
18569             else
18570             {
18571                 ae_v_cmuld(&c->ptr.pp_complex[ic+i][jc+j1], 1, ae_v_len(jc+j1,jc+j2), beta);
18572             }
18573         }
18574         if( ae_fp_neq(alpha,(double)(0))&&k>0 )
18575         {
18576             for(i=0; i<=k-1; i++)
18577             {
18578                 for(j=0; j<=n-1; j++)
18579                 {
18580                     if( isupper )
18581                     {
18582                         j1 = j;
18583                         j2 = n-1;
18584                     }
18585                     else
18586                     {
18587                         j1 = 0;
18588                         j2 = j;
18589                     }
18590                     v = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[ia+i][ja+j], _state),alpha);
18591                     ae_v_caddc(&c->ptr.pp_complex[ic+j][jc+j1], 1, &a->ptr.pp_complex[ia+i][ja+j1], 1, "N", ae_v_len(jc+j1,jc+j2), v);
18592                 }
18593             }
18594         }
18595         return;
18596     }
18597 }
18598 
18599 
18600 /*************************************************************************
18601 Level 2 subrotuine
18602 *************************************************************************/
ablas_rmatrixsyrk2(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)18603 static void ablas_rmatrixsyrk2(ae_int_t n,
18604      ae_int_t k,
18605      double alpha,
18606      /* Real    */ ae_matrix* a,
18607      ae_int_t ia,
18608      ae_int_t ja,
18609      ae_int_t optypea,
18610      double beta,
18611      /* Real    */ ae_matrix* c,
18612      ae_int_t ic,
18613      ae_int_t jc,
18614      ae_bool isupper,
18615      ae_state *_state)
18616 {
18617     ae_int_t i;
18618     ae_int_t j;
18619     ae_int_t j1;
18620     ae_int_t j2;
18621     double v;
18622 
18623 
18624 
18625     /*
18626      * Fast exit (nothing to be done)
18627      */
18628     if( (ae_fp_eq(alpha,(double)(0))||k==0)&&ae_fp_eq(beta,(double)(1)) )
18629     {
18630         return;
18631     }
18632 
18633     /*
18634      * Try to call fast SYRK
18635      */
18636     if( rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper, _state) )
18637     {
18638         return;
18639     }
18640 
18641     /*
18642      * SYRK
18643      */
18644     if( optypea==0 )
18645     {
18646 
18647         /*
18648          * C=alpha*A*A^H+beta*C
18649          */
18650         for(i=0; i<=n-1; i++)
18651         {
18652             if( isupper )
18653             {
18654                 j1 = i;
18655                 j2 = n-1;
18656             }
18657             else
18658             {
18659                 j1 = 0;
18660                 j2 = i;
18661             }
18662             for(j=j1; j<=j2; j++)
18663             {
18664                 if( ae_fp_neq(alpha,(double)(0))&&k>0 )
18665                 {
18666                     v = ae_v_dotproduct(&a->ptr.pp_double[ia+i][ja], 1, &a->ptr.pp_double[ia+j][ja], 1, ae_v_len(ja,ja+k-1));
18667                 }
18668                 else
18669                 {
18670                     v = (double)(0);
18671                 }
18672                 if( ae_fp_eq(beta,(double)(0)) )
18673                 {
18674                     c->ptr.pp_double[ic+i][jc+j] = alpha*v;
18675                 }
18676                 else
18677                 {
18678                     c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j]+alpha*v;
18679                 }
18680             }
18681         }
18682         return;
18683     }
18684     else
18685     {
18686 
18687         /*
18688          * C=alpha*A^H*A+beta*C
18689          */
18690         for(i=0; i<=n-1; i++)
18691         {
18692             if( isupper )
18693             {
18694                 j1 = i;
18695                 j2 = n-1;
18696             }
18697             else
18698             {
18699                 j1 = 0;
18700                 j2 = i;
18701             }
18702             if( ae_fp_eq(beta,(double)(0)) )
18703             {
18704                 for(j=j1; j<=j2; j++)
18705                 {
18706                     c->ptr.pp_double[ic+i][jc+j] = (double)(0);
18707                 }
18708             }
18709             else
18710             {
18711                 ae_v_muld(&c->ptr.pp_double[ic+i][jc+j1], 1, ae_v_len(jc+j1,jc+j2), beta);
18712             }
18713         }
18714         if( ae_fp_neq(alpha,(double)(0))&&k>0 )
18715         {
18716             for(i=0; i<=k-1; i++)
18717             {
18718                 for(j=0; j<=n-1; j++)
18719                 {
18720                     if( isupper )
18721                     {
18722                         j1 = j;
18723                         j2 = n-1;
18724                     }
18725                     else
18726                     {
18727                         j1 = 0;
18728                         j2 = j;
18729                     }
18730                     v = alpha*a->ptr.pp_double[ia+i][ja+j];
18731                     ae_v_addd(&c->ptr.pp_double[ic+j][jc+j1], 1, &a->ptr.pp_double[ia+i][ja+j1], 1, ae_v_len(jc+j1,jc+j2), v);
18732                 }
18733             }
18734         }
18735         return;
18736     }
18737 }
18738 
18739 
18740 /*************************************************************************
18741 This subroutine is an actual implementation of CMatrixGEMM.  It  does  not
18742 perform some integrity checks performed in the  driver  function,  and  it
18743 does not activate multithreading  framework  (driver  decides  whether  to
18744 activate workers or not).
18745 
18746   -- ALGLIB routine --
18747      10.01.2019
18748      Bochkanov Sergey
18749 *************************************************************************/
ablas_cmatrixgemmrec(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)18750 static void ablas_cmatrixgemmrec(ae_int_t m,
18751      ae_int_t n,
18752      ae_int_t k,
18753      ae_complex alpha,
18754      /* Complex */ ae_matrix* a,
18755      ae_int_t ia,
18756      ae_int_t ja,
18757      ae_int_t optypea,
18758      /* Complex */ ae_matrix* b,
18759      ae_int_t ib,
18760      ae_int_t jb,
18761      ae_int_t optypeb,
18762      ae_complex beta,
18763      /* Complex */ ae_matrix* c,
18764      ae_int_t ic,
18765      ae_int_t jc,
18766      ae_state *_state)
18767 {
18768     ae_int_t s1;
18769     ae_int_t s2;
18770     ae_int_t tsa;
18771     ae_int_t tsb;
18772     ae_int_t tscur;
18773 
18774 
18775 
18776     /*
18777      * Tile hierarchy: B -> A -> A/2
18778      */
18779     tsa = matrixtilesizea(_state)/2;
18780     tsb = matrixtilesizeb(_state);
18781     tscur = tsb;
18782     if( imax3(m, n, k, _state)<=tsb )
18783     {
18784         tscur = tsa;
18785     }
18786     ae_assert(tscur>=1, "CMatrixGEMMRec: integrity check failed", _state);
18787 
18788     /*
18789      * Use MKL or ALGLIB basecase code
18790      */
18791     if( imax3(m, n, k, _state)<=tsb )
18792     {
18793         if( cmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
18794         {
18795             return;
18796         }
18797     }
18798     if( imax3(m, n, k, _state)<=tsa )
18799     {
18800         cmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18801         return;
18802     }
18803 
18804     /*
18805      * Recursive algorithm: parallel splitting on M/N
18806      */
18807     if( m>=n&&m>=k )
18808     {
18809 
18810         /*
18811          * A*B = (A1 A2)^T*B
18812          */
18813         tiledsplit(m, tscur, &s1, &s2, _state);
18814         ablas_cmatrixgemmrec(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18815         if( optypea==0 )
18816         {
18817             ablas_cmatrixgemmrec(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
18818         }
18819         else
18820         {
18821             ablas_cmatrixgemmrec(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
18822         }
18823         return;
18824     }
18825     if( n>=m&&n>=k )
18826     {
18827 
18828         /*
18829          * A*B = A*(B1 B2)
18830          */
18831         tiledsplit(n, tscur, &s1, &s2, _state);
18832         if( optypeb==0 )
18833         {
18834             ablas_cmatrixgemmrec(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18835             ablas_cmatrixgemmrec(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1, _state);
18836         }
18837         else
18838         {
18839             ablas_cmatrixgemmrec(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18840             ablas_cmatrixgemmrec(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1, _state);
18841         }
18842         return;
18843     }
18844 
18845     /*
18846      * Recursive algorithm: serial splitting on K
18847      */
18848 
18849     /*
18850      * A*B = (A1 A2)*(B1 B2)^T
18851      */
18852     tiledsplit(k, tscur, &s1, &s2, _state);
18853     if( optypea==0&&optypeb==0 )
18854     {
18855         ablas_cmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18856         ablas_cmatrixgemmrec(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
18857     }
18858     if( optypea==0&&optypeb!=0 )
18859     {
18860         ablas_cmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18861         ablas_cmatrixgemmrec(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
18862     }
18863     if( optypea!=0&&optypeb==0 )
18864     {
18865         ablas_cmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18866         ablas_cmatrixgemmrec(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
18867     }
18868     if( optypea!=0&&optypeb!=0 )
18869     {
18870         ablas_cmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18871         ablas_cmatrixgemmrec(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, ae_complex_from_d(1.0), c, ic, jc, _state);
18872     }
18873 }
18874 
18875 
18876 /*************************************************************************
18877 Serial stub for GPL edition.
18878 *************************************************************************/
_trypexec_ablas_cmatrixgemmrec(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)18879 ae_bool _trypexec_ablas_cmatrixgemmrec(ae_int_t m,
18880     ae_int_t n,
18881     ae_int_t k,
18882     ae_complex alpha,
18883     /* Complex */ ae_matrix* a,
18884     ae_int_t ia,
18885     ae_int_t ja,
18886     ae_int_t optypea,
18887     /* Complex */ ae_matrix* b,
18888     ae_int_t ib,
18889     ae_int_t jb,
18890     ae_int_t optypeb,
18891     ae_complex beta,
18892     /* Complex */ ae_matrix* c,
18893     ae_int_t ic,
18894     ae_int_t jc,
18895     ae_state *_state)
18896 {
18897     return ae_false;
18898 }
18899 
18900 
18901 /*************************************************************************
18902 This subroutine is an actual implementation of RMatrixGEMM.  It  does  not
18903 perform some integrity checks performed in the  driver  function,  and  it
18904 does not activate multithreading  framework  (driver  decides  whether  to
18905 activate workers or not).
18906 
18907   -- ALGLIB routine --
18908      10.01.2019
18909      Bochkanov Sergey
18910 *************************************************************************/
ablas_rmatrixgemmrec(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)18911 static void ablas_rmatrixgemmrec(ae_int_t m,
18912      ae_int_t n,
18913      ae_int_t k,
18914      double alpha,
18915      /* Real    */ ae_matrix* a,
18916      ae_int_t ia,
18917      ae_int_t ja,
18918      ae_int_t optypea,
18919      /* Real    */ ae_matrix* b,
18920      ae_int_t ib,
18921      ae_int_t jb,
18922      ae_int_t optypeb,
18923      double beta,
18924      /* Real    */ ae_matrix* c,
18925      ae_int_t ic,
18926      ae_int_t jc,
18927      ae_state *_state)
18928 {
18929     ae_int_t s1;
18930     ae_int_t s2;
18931     ae_int_t tsa;
18932     ae_int_t tsb;
18933     ae_int_t tscur;
18934 
18935 
18936     tsa = matrixtilesizea(_state);
18937     tsb = matrixtilesizeb(_state);
18938     tscur = tsb;
18939     if( imax3(m, n, k, _state)<=tsb )
18940     {
18941         tscur = tsa;
18942     }
18943     ae_assert(tscur>=1, "RMatrixGEMMRec: integrity check failed", _state);
18944 
18945     /*
18946      * Use MKL or ALGLIB basecase code
18947      */
18948     if( (m<=tsb&&n<=tsb)&&k<=tsb )
18949     {
18950         if( rmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
18951         {
18952             return;
18953         }
18954     }
18955     if( (m<=tsa&&n<=tsa)&&k<=tsa )
18956     {
18957         rmatrixgemmk(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18958         return;
18959     }
18960 
18961     /*
18962      * Recursive algorithm: split on M or N
18963      */
18964     if( m>=n&&m>=k )
18965     {
18966 
18967         /*
18968          * A*B = (A1 A2)^T*B
18969          */
18970         tiledsplit(m, tscur, &s1, &s2, _state);
18971         if( optypea==0 )
18972         {
18973             ablas_rmatrixgemmrec(s2, n, k, alpha, a, ia+s1, ja, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
18974             ablas_rmatrixgemmrec(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18975         }
18976         else
18977         {
18978             ablas_rmatrixgemmrec(s2, n, k, alpha, a, ia, ja+s1, optypea, b, ib, jb, optypeb, beta, c, ic+s1, jc, _state);
18979             ablas_rmatrixgemmrec(s1, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18980         }
18981         return;
18982     }
18983     if( n>=m&&n>=k )
18984     {
18985 
18986         /*
18987          * A*B = A*(B1 B2)
18988          */
18989         tiledsplit(n, tscur, &s1, &s2, _state);
18990         if( optypeb==0 )
18991         {
18992             ablas_rmatrixgemmrec(m, s2, k, alpha, a, ia, ja, optypea, b, ib, jb+s1, optypeb, beta, c, ic, jc+s1, _state);
18993             ablas_rmatrixgemmrec(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18994         }
18995         else
18996         {
18997             ablas_rmatrixgemmrec(m, s2, k, alpha, a, ia, ja, optypea, b, ib+s1, jb, optypeb, beta, c, ic, jc+s1, _state);
18998             ablas_rmatrixgemmrec(m, s1, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
18999         }
19000         return;
19001     }
19002 
19003     /*
19004      * Recursive algorithm: split on K
19005      */
19006 
19007     /*
19008      * A*B = (A1 A2)*(B1 B2)^T
19009      */
19010     tiledsplit(k, tscur, &s1, &s2, _state);
19011     if( optypea==0&&optypeb==0 )
19012     {
19013         ablas_rmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
19014         ablas_rmatrixgemmrec(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc, _state);
19015     }
19016     if( optypea==0&&optypeb!=0 )
19017     {
19018         ablas_rmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
19019         ablas_rmatrixgemmrec(m, n, s2, alpha, a, ia, ja+s1, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc, _state);
19020     }
19021     if( optypea!=0&&optypeb==0 )
19022     {
19023         ablas_rmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
19024         ablas_rmatrixgemmrec(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib+s1, jb, optypeb, 1.0, c, ic, jc, _state);
19025     }
19026     if( optypea!=0&&optypeb!=0 )
19027     {
19028         ablas_rmatrixgemmrec(m, n, s1, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state);
19029         ablas_rmatrixgemmrec(m, n, s2, alpha, a, ia+s1, ja, optypea, b, ib, jb+s1, optypeb, 1.0, c, ic, jc, _state);
19030     }
19031 }
19032 
19033 
19034 /*************************************************************************
19035 Serial stub for GPL edition.
19036 *************************************************************************/
_trypexec_ablas_rmatrixgemmrec(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)19037 ae_bool _trypexec_ablas_rmatrixgemmrec(ae_int_t m,
19038     ae_int_t n,
19039     ae_int_t k,
19040     double alpha,
19041     /* Real    */ ae_matrix* a,
19042     ae_int_t ia,
19043     ae_int_t ja,
19044     ae_int_t optypea,
19045     /* Real    */ ae_matrix* b,
19046     ae_int_t ib,
19047     ae_int_t jb,
19048     ae_int_t optypeb,
19049     double beta,
19050     /* Real    */ ae_matrix* c,
19051     ae_int_t ic,
19052     ae_int_t jc,
19053     ae_state *_state)
19054 {
19055     return ae_false;
19056 }
19057 
19058 
19059 #endif
19060 #if defined(AE_COMPILE_ORTFAC) || !defined(AE_PARTIAL_BUILD)
19061 
19062 
19063 /*************************************************************************
19064 QR decomposition of a rectangular matrix of size MxN
19065 
19066 Input parameters:
19067     A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
19068     M   -   number of rows in matrix A.
19069     N   -   number of columns in matrix A.
19070 
19071 Output parameters:
19072     A   -   matrices Q and R in compact form (see below).
19073     Tau -   array of scalar factors which are used to form
19074             matrix Q. Array whose index ranges within [0.. Min(M-1,N-1)].
19075 
19076 Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
19077 MxM, R - upper triangular (or upper trapezoid) matrix of size M x N.
19078 
19079 The elements of matrix R are located on and above the main diagonal of
19080 matrix A. The elements which are located in Tau array and below the main
19081 diagonal of matrix A are used to form matrix Q as follows:
19082 
19083 Matrix Q is represented as a product of elementary reflections
19084 
19085 Q = H(0)*H(2)*...*H(k-1),
19086 
19087 where k = min(m,n), and each H(i) is in the form
19088 
19089 H(i) = 1 - tau * v * (v^T)
19090 
19091 where tau is a scalar stored in Tau[I]; v - real vector,
19092 so that v(0:i-1) = 0, v(i) = 1, v(i+1:m-1) stored in A(i+1:m-1,i).
19093 
19094   ! FREE EDITION OF ALGLIB:
19095   !
19096   ! Free Edition of ALGLIB supports following important features for  this
19097   ! function:
19098   ! * C++ version: x64 SIMD support using C++ intrinsics
19099   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
19100   !
19101   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
19102   ! Reference Manual in order  to  find  out  how to activate SIMD support
19103   ! in ALGLIB.
19104 
19105   ! COMMERCIAL EDITION OF ALGLIB:
19106   !
19107   ! Commercial Edition of ALGLIB includes following important improvements
19108   ! of this function:
19109   ! * high-performance native backend with same C# interface (C# version)
19110   ! * multithreading support (C++ and C# versions)
19111   ! * hardware vendor (Intel) implementations of linear algebra primitives
19112   !   (C++ and C# versions, x86/x64 platform)
19113   !
19114   ! We recommend you to read 'Working with commercial version' section  of
19115   ! ALGLIB Reference Manual in order to find out how to  use  performance-
19116   ! related features provided by commercial edition of ALGLIB.
19117 
19118   -- ALGLIB routine --
19119      17.02.2010
19120      Bochkanov Sergey
19121 *************************************************************************/
rmatrixqr(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * tau,ae_state * _state)19122 void rmatrixqr(/* Real    */ ae_matrix* a,
19123      ae_int_t m,
19124      ae_int_t n,
19125      /* Real    */ ae_vector* tau,
19126      ae_state *_state)
19127 {
19128     ae_frame _frame_block;
19129     ae_vector work;
19130     ae_vector t;
19131     ae_vector taubuf;
19132     ae_int_t minmn;
19133     ae_matrix tmpa;
19134     ae_matrix tmpt;
19135     ae_matrix tmpr;
19136     ae_int_t blockstart;
19137     ae_int_t blocksize;
19138     ae_int_t rowscount;
19139     ae_int_t i;
19140     ae_int_t ts;
19141 
19142     ae_frame_make(_state, &_frame_block);
19143     memset(&work, 0, sizeof(work));
19144     memset(&t, 0, sizeof(t));
19145     memset(&taubuf, 0, sizeof(taubuf));
19146     memset(&tmpa, 0, sizeof(tmpa));
19147     memset(&tmpt, 0, sizeof(tmpt));
19148     memset(&tmpr, 0, sizeof(tmpr));
19149     ae_vector_clear(tau);
19150     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
19151     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
19152     ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
19153     ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
19154     ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
19155     ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);
19156 
19157     if( m<=0||n<=0 )
19158     {
19159         ae_frame_leave(_state);
19160         return;
19161     }
19162     minmn = ae_minint(m, n, _state);
19163     ts = matrixtilesizeb(_state);
19164     ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
19165     ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
19166     ae_vector_set_length(tau, minmn, _state);
19167     ae_vector_set_length(&taubuf, minmn, _state);
19168     ae_matrix_set_length(&tmpa, m, ts, _state);
19169     ae_matrix_set_length(&tmpt, ts, 2*ts, _state);
19170     ae_matrix_set_length(&tmpr, 2*ts, n, _state);
19171 
19172     /*
19173      * Blocked code
19174      */
19175     blockstart = 0;
19176     while(blockstart!=minmn)
19177     {
19178 
19179         /*
19180          * Determine block size
19181          */
19182         blocksize = minmn-blockstart;
19183         if( blocksize>ts )
19184         {
19185             blocksize = ts;
19186         }
19187         rowscount = m-blockstart;
19188 
19189         /*
19190          * QR decomposition of submatrix.
19191          * Matrix is copied to temporary storage to solve
19192          * some TLB issues arising from non-contiguous memory
19193          * access pattern.
19194          */
19195         rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
19196         rmatrixqrbasecase(&tmpa, rowscount, blocksize, &work, &t, &taubuf, _state);
19197         rmatrixcopy(rowscount, blocksize, &tmpa, 0, 0, a, blockstart, blockstart, _state);
19198         ae_v_move(&tau->ptr.p_double[blockstart], 1, &taubuf.ptr.p_double[0], 1, ae_v_len(blockstart,blockstart+blocksize-1));
19199 
19200         /*
19201          * Update the rest, choose between:
19202          * a) Level 2 algorithm (when the rest of the matrix is small enough)
19203          * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
19204          *    representation for products of Householder transformations',
19205          *    by R. Schreiber and C. Van Loan.
19206          */
19207         if( blockstart+blocksize<=n-1 )
19208         {
19209             if( n-blockstart-blocksize>=2*ts||rowscount>=4*ts )
19210             {
19211 
19212                 /*
19213                  * Prepare block reflector
19214                  */
19215                 ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);
19216 
19217                 /*
19218                  * Multiply the rest of A by Q'.
19219                  *
19220                  * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
19221                  * Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA'
19222                  */
19223                 rmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, 1.0, &tmpa, 0, 0, 1, a, blockstart, blockstart+blocksize, 0, 0.0, &tmpr, 0, 0, _state);
19224                 rmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, 1.0, &tmpt, 0, 0, 1, &tmpr, 0, 0, 0, 0.0, &tmpr, blocksize, 0, _state);
19225                 rmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, 1.0, &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, 1.0, a, blockstart, blockstart+blocksize, _state);
19226             }
19227             else
19228             {
19229 
19230                 /*
19231                  * Level 2 algorithm
19232                  */
19233                 for(i=0; i<=blocksize-1; i++)
19234                 {
19235                     ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], tmpa.stride, ae_v_len(1,rowscount-i));
19236                     t.ptr.p_double[1] = (double)(1);
19237                     applyreflectionfromtheleft(a, taubuf.ptr.p_double[i], &t, blockstart+i, m-1, blockstart+blocksize, n-1, &work, _state);
19238                 }
19239             }
19240         }
19241 
19242         /*
19243          * Advance
19244          */
19245         blockstart = blockstart+blocksize;
19246     }
19247     ae_frame_leave(_state);
19248 }
19249 
19250 
19251 /*************************************************************************
19252 LQ decomposition of a rectangular matrix of size MxN
19253 
19254 Input parameters:
19255     A   -   matrix A whose indexes range within [0..M-1, 0..N-1].
19256     M   -   number of rows in matrix A.
19257     N   -   number of columns in matrix A.
19258 
19259 Output parameters:
19260     A   -   matrices L and Q in compact form (see below)
19261     Tau -   array of scalar factors which are used to form
19262             matrix Q. Array whose index ranges within [0..Min(M,N)-1].
19263 
19264 Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
19265 MxM, L - lower triangular (or lower trapezoid) matrix of size M x N.
19266 
19267 The elements of matrix L are located on and below  the  main  diagonal  of
19268 matrix A. The elements which are located in Tau array and above  the  main
19269 diagonal of matrix A are used to form matrix Q as follows:
19270 
19271 Matrix Q is represented as a product of elementary reflections
19272 
19273 Q = H(k-1)*H(k-2)*...*H(1)*H(0),
19274 
19275 where k = min(m,n), and each H(i) is of the form
19276 
19277 H(i) = 1 - tau * v * (v^T)
19278 
19279 where tau is a scalar stored in Tau[I]; v - real vector, so that v(0:i-1)=0,
19280 v(i) = 1, v(i+1:n-1) stored in A(i,i+1:n-1).
19281 
19282   ! FREE EDITION OF ALGLIB:
19283   !
19284   ! Free Edition of ALGLIB supports following important features for  this
19285   ! function:
19286   ! * C++ version: x64 SIMD support using C++ intrinsics
19287   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
19288   !
19289   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
19290   ! Reference Manual in order  to  find  out  how to activate SIMD support
19291   ! in ALGLIB.
19292 
19293   ! COMMERCIAL EDITION OF ALGLIB:
19294   !
19295   ! Commercial Edition of ALGLIB includes following important improvements
19296   ! of this function:
19297   ! * high-performance native backend with same C# interface (C# version)
19298   ! * multithreading support (C++ and C# versions)
19299   ! * hardware vendor (Intel) implementations of linear algebra primitives
19300   !   (C++ and C# versions, x86/x64 platform)
19301   !
19302   ! We recommend you to read 'Working with commercial version' section  of
19303   ! ALGLIB Reference Manual in order to find out how to  use  performance-
19304   ! related features provided by commercial edition of ALGLIB.
19305 
19306   -- ALGLIB routine --
19307      17.02.2010
19308      Bochkanov Sergey
19309 *************************************************************************/
rmatrixlq(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * tau,ae_state * _state)19310 void rmatrixlq(/* Real    */ ae_matrix* a,
19311      ae_int_t m,
19312      ae_int_t n,
19313      /* Real    */ ae_vector* tau,
19314      ae_state *_state)
19315 {
19316     ae_frame _frame_block;
19317     ae_vector work;
19318     ae_vector t;
19319     ae_vector taubuf;
19320     ae_int_t minmn;
19321     ae_matrix tmpa;
19322     ae_matrix tmpt;
19323     ae_matrix tmpr;
19324     ae_int_t blockstart;
19325     ae_int_t blocksize;
19326     ae_int_t columnscount;
19327     ae_int_t i;
19328     ae_int_t ts;
19329 
19330     ae_frame_make(_state, &_frame_block);
19331     memset(&work, 0, sizeof(work));
19332     memset(&t, 0, sizeof(t));
19333     memset(&taubuf, 0, sizeof(taubuf));
19334     memset(&tmpa, 0, sizeof(tmpa));
19335     memset(&tmpt, 0, sizeof(tmpt));
19336     memset(&tmpr, 0, sizeof(tmpr));
19337     ae_vector_clear(tau);
19338     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
19339     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
19340     ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
19341     ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
19342     ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
19343     ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);
19344 
19345     if( m<=0||n<=0 )
19346     {
19347         ae_frame_leave(_state);
19348         return;
19349     }
19350     minmn = ae_minint(m, n, _state);
19351     ts = matrixtilesizeb(_state);
19352     ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
19353     ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
19354     ae_vector_set_length(tau, minmn, _state);
19355     ae_vector_set_length(&taubuf, minmn, _state);
19356     ae_matrix_set_length(&tmpa, ts, n, _state);
19357     ae_matrix_set_length(&tmpt, ts, 2*ts, _state);
19358     ae_matrix_set_length(&tmpr, m, 2*ts, _state);
19359 
19360     /*
19361      * Blocked code
19362      */
19363     blockstart = 0;
19364     while(blockstart!=minmn)
19365     {
19366 
19367         /*
19368          * Determine block size
19369          */
19370         blocksize = minmn-blockstart;
19371         if( blocksize>ts )
19372         {
19373             blocksize = ts;
19374         }
19375         columnscount = n-blockstart;
19376 
19377         /*
19378          * LQ decomposition of submatrix.
19379          * Matrix is copied to temporary storage to solve
19380          * some TLB issues arising from non-contiguous memory
19381          * access pattern.
19382          */
19383         rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
19384         rmatrixlqbasecase(&tmpa, blocksize, columnscount, &work, &t, &taubuf, _state);
19385         rmatrixcopy(blocksize, columnscount, &tmpa, 0, 0, a, blockstart, blockstart, _state);
19386         ae_v_move(&tau->ptr.p_double[blockstart], 1, &taubuf.ptr.p_double[0], 1, ae_v_len(blockstart,blockstart+blocksize-1));
19387 
19388         /*
19389          * Update the rest, choose between:
19390          * a) Level 2 algorithm (when the rest of the matrix is small enough)
19391          * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
19392          *    representation for products of Householder transformations',
19393          *    by R. Schreiber and C. Van Loan.
19394          */
19395         if( blockstart+blocksize<=m-1 )
19396         {
19397             if( m-blockstart-blocksize>=2*ts )
19398             {
19399 
19400                 /*
19401                  * Prepare block reflector
19402                  */
19403                 ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);
19404 
19405                 /*
19406                  * Multiply the rest of A by Q.
19407                  *
19408                  * Q  = E + Y*T*Y'  = E + TmpA'*TmpT*TmpA
19409                  */
19410                 rmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, 1.0, a, blockstart+blocksize, blockstart, 0, &tmpa, 0, 0, 1, 0.0, &tmpr, 0, 0, _state);
19411                 rmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, 1.0, &tmpr, 0, 0, 0, &tmpt, 0, 0, 0, 0.0, &tmpr, 0, blocksize, _state);
19412                 rmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, 1.0, &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, 1.0, a, blockstart+blocksize, blockstart, _state);
19413             }
19414             else
19415             {
19416 
19417                 /*
19418                  * Level 2 algorithm
19419                  */
19420                 for(i=0; i<=blocksize-1; i++)
19421                 {
19422                     ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], 1, ae_v_len(1,columnscount-i));
19423                     t.ptr.p_double[1] = (double)(1);
19424                     applyreflectionfromtheright(a, taubuf.ptr.p_double[i], &t, blockstart+blocksize, m-1, blockstart+i, n-1, &work, _state);
19425                 }
19426             }
19427         }
19428 
19429         /*
19430          * Advance
19431          */
19432         blockstart = blockstart+blocksize;
19433     }
19434     ae_frame_leave(_state);
19435 }
19436 
19437 
19438 /*************************************************************************
19439 QR decomposition of a rectangular complex matrix of size MxN
19440 
19441 Input parameters:
19442     A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
19443     M   -   number of rows in matrix A.
19444     N   -   number of columns in matrix A.
19445 
19446 Output parameters:
19447     A   -   matrices Q and R in compact form
19448     Tau -   array of scalar factors which are used to form matrix Q. Array
19449             whose indexes range within [0.. Min(M,N)-1]
19450 
19451 Matrix A is represented as A = QR, where Q is an orthogonal matrix of size
19452 MxM, R - upper triangular (or upper trapezoid) matrix of size MxN.
19453 
19454   ! FREE EDITION OF ALGLIB:
19455   !
19456   ! Free Edition of ALGLIB supports following important features for  this
19457   ! function:
19458   ! * C++ version: x64 SIMD support using C++ intrinsics
19459   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
19460   !
19461   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
19462   ! Reference Manual in order  to  find  out  how to activate SIMD support
19463   ! in ALGLIB.
19464 
19465   ! COMMERCIAL EDITION OF ALGLIB:
19466   !
19467   ! Commercial Edition of ALGLIB includes following important improvements
19468   ! of this function:
19469   ! * high-performance native backend with same C# interface (C# version)
19470   ! * multithreading support (C++ and C# versions)
19471   ! * hardware vendor (Intel) implementations of linear algebra primitives
19472   !   (C++ and C# versions, x86/x64 platform)
19473   !
19474   ! We recommend you to read 'Working with commercial version' section  of
19475   ! ALGLIB Reference Manual in order to find out how to  use  performance-
19476   ! related features provided by commercial edition of ALGLIB.
19477 
19478   -- LAPACK routine (version 3.0) --
19479      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
19480      Courant Institute, Argonne National Lab, and Rice University
19481      September 30, 1994
19482 *************************************************************************/
cmatrixqr(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * tau,ae_state * _state)19483 void cmatrixqr(/* Complex */ ae_matrix* a,
19484      ae_int_t m,
19485      ae_int_t n,
19486      /* Complex */ ae_vector* tau,
19487      ae_state *_state)
19488 {
19489     ae_frame _frame_block;
19490     ae_vector work;
19491     ae_vector t;
19492     ae_vector taubuf;
19493     ae_int_t minmn;
19494     ae_matrix tmpa;
19495     ae_matrix tmpt;
19496     ae_matrix tmpr;
19497     ae_int_t blockstart;
19498     ae_int_t blocksize;
19499     ae_int_t rowscount;
19500     ae_int_t i;
19501     ae_int_t ts;
19502 
19503     ae_frame_make(_state, &_frame_block);
19504     memset(&work, 0, sizeof(work));
19505     memset(&t, 0, sizeof(t));
19506     memset(&taubuf, 0, sizeof(taubuf));
19507     memset(&tmpa, 0, sizeof(tmpa));
19508     memset(&tmpt, 0, sizeof(tmpt));
19509     memset(&tmpr, 0, sizeof(tmpr));
19510     ae_vector_clear(tau);
19511     ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
19512     ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
19513     ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
19514     ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
19515     ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
19516     ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);
19517 
19518     if( m<=0||n<=0 )
19519     {
19520         ae_frame_leave(_state);
19521         return;
19522     }
19523     ts = matrixtilesizeb(_state)/2;
19524     minmn = ae_minint(m, n, _state);
19525     ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
19526     ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
19527     ae_vector_set_length(tau, minmn, _state);
19528     ae_vector_set_length(&taubuf, minmn, _state);
19529     ae_matrix_set_length(&tmpa, m, ts, _state);
19530     ae_matrix_set_length(&tmpt, ts, ts, _state);
19531     ae_matrix_set_length(&tmpr, 2*ts, n, _state);
19532 
19533     /*
19534      * Blocked code
19535      */
19536     blockstart = 0;
19537     while(blockstart!=minmn)
19538     {
19539 
19540         /*
19541          * Determine block size
19542          */
19543         blocksize = minmn-blockstart;
19544         if( blocksize>ts )
19545         {
19546             blocksize = ts;
19547         }
19548         rowscount = m-blockstart;
19549 
19550         /*
19551          * QR decomposition of submatrix.
19552          * Matrix is copied to temporary storage to solve
19553          * some TLB issues arising from non-contiguous memory
19554          * access pattern.
19555          */
19556         cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
19557         ortfac_cmatrixqrbasecase(&tmpa, rowscount, blocksize, &work, &t, &taubuf, _state);
19558         cmatrixcopy(rowscount, blocksize, &tmpa, 0, 0, a, blockstart, blockstart, _state);
19559         ae_v_cmove(&tau->ptr.p_complex[blockstart], 1, &taubuf.ptr.p_complex[0], 1, "N", ae_v_len(blockstart,blockstart+blocksize-1));
19560 
19561         /*
19562          * Update the rest, choose between:
19563          * a) Level 2 algorithm (when the rest of the matrix is small enough)
19564          * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
19565          *    representation for products of Householder transformations',
19566          *    by R. Schreiber and C. Van Loan.
19567          */
19568         if( blockstart+blocksize<=n-1 )
19569         {
19570             if( n-blockstart-blocksize>=2*ts )
19571             {
19572 
19573                 /*
19574                  * Prepare block reflector
19575                  */
19576                 ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);
19577 
19578                 /*
19579                  * Multiply the rest of A by Q'.
19580                  *
19581                  * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
19582                  * Q' = E + Y*T'*Y' = E + TmpA*TmpT'*TmpA'
19583                  */
19584                 cmatrixgemm(blocksize, n-blockstart-blocksize, rowscount, ae_complex_from_d(1.0), &tmpa, 0, 0, 2, a, blockstart, blockstart+blocksize, 0, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
19585                 cmatrixgemm(blocksize, n-blockstart-blocksize, blocksize, ae_complex_from_d(1.0), &tmpt, 0, 0, 2, &tmpr, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, blocksize, 0, _state);
19586                 cmatrixgemm(rowscount, n-blockstart-blocksize, blocksize, ae_complex_from_d(1.0), &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, ae_complex_from_d(1.0), a, blockstart, blockstart+blocksize, _state);
19587             }
19588             else
19589             {
19590 
19591                 /*
19592                  * Level 2 algorithm
19593                  */
19594                 for(i=0; i<=blocksize-1; i++)
19595                 {
19596                     ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], tmpa.stride, "N", ae_v_len(1,rowscount-i));
19597                     t.ptr.p_complex[1] = ae_complex_from_i(1);
19598                     complexapplyreflectionfromtheleft(a, ae_c_conj(taubuf.ptr.p_complex[i], _state), &t, blockstart+i, m-1, blockstart+blocksize, n-1, &work, _state);
19599                 }
19600             }
19601         }
19602 
19603         /*
19604          * Advance
19605          */
19606         blockstart = blockstart+blocksize;
19607     }
19608     ae_frame_leave(_state);
19609 }
19610 
19611 
19612 /*************************************************************************
19613 LQ decomposition of a rectangular complex matrix of size MxN
19614 
19615 Input parameters:
19616     A   -   matrix A whose indexes range within [0..M-1, 0..N-1]
19617     M   -   number of rows in matrix A.
19618     N   -   number of columns in matrix A.
19619 
19620 Output parameters:
19621     A   -   matrices Q and L in compact form
19622     Tau -   array of scalar factors which are used to form matrix Q. Array
19623             whose indexes range within [0.. Min(M,N)-1]
19624 
19625 Matrix A is represented as A = LQ, where Q is an orthogonal matrix of size
19626 MxM, L - lower triangular (or lower trapezoid) matrix of size MxN.
19627 
19628   ! FREE EDITION OF ALGLIB:
19629   !
19630   ! Free Edition of ALGLIB supports following important features for  this
19631   ! function:
19632   ! * C++ version: x64 SIMD support using C++ intrinsics
19633   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
19634   !
19635   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
19636   ! Reference Manual in order  to  find  out  how to activate SIMD support
19637   ! in ALGLIB.
19638 
19639   ! COMMERCIAL EDITION OF ALGLIB:
19640   !
19641   ! Commercial Edition of ALGLIB includes following important improvements
19642   ! of this function:
19643   ! * high-performance native backend with same C# interface (C# version)
19644   ! * multithreading support (C++ and C# versions)
19645   ! * hardware vendor (Intel) implementations of linear algebra primitives
19646   !   (C++ and C# versions, x86/x64 platform)
19647   !
19648   ! We recommend you to read 'Working with commercial version' section  of
19649   ! ALGLIB Reference Manual in order to find out how to  use  performance-
19650   ! related features provided by commercial edition of ALGLIB.
19651 
19652   -- LAPACK routine (version 3.0) --
19653      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
19654      Courant Institute, Argonne National Lab, and Rice University
19655      September 30, 1994
19656 *************************************************************************/
cmatrixlq(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * tau,ae_state * _state)19657 void cmatrixlq(/* Complex */ ae_matrix* a,
19658      ae_int_t m,
19659      ae_int_t n,
19660      /* Complex */ ae_vector* tau,
19661      ae_state *_state)
19662 {
19663     ae_frame _frame_block;
19664     ae_vector work;
19665     ae_vector t;
19666     ae_vector taubuf;
19667     ae_int_t minmn;
19668     ae_matrix tmpa;
19669     ae_matrix tmpt;
19670     ae_matrix tmpr;
19671     ae_int_t blockstart;
19672     ae_int_t blocksize;
19673     ae_int_t columnscount;
19674     ae_int_t i;
19675     ae_int_t ts;
19676 
19677     ae_frame_make(_state, &_frame_block);
19678     memset(&work, 0, sizeof(work));
19679     memset(&t, 0, sizeof(t));
19680     memset(&taubuf, 0, sizeof(taubuf));
19681     memset(&tmpa, 0, sizeof(tmpa));
19682     memset(&tmpt, 0, sizeof(tmpt));
19683     memset(&tmpr, 0, sizeof(tmpr));
19684     ae_vector_clear(tau);
19685     ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
19686     ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
19687     ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
19688     ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
19689     ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
19690     ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);
19691 
19692     if( m<=0||n<=0 )
19693     {
19694         ae_frame_leave(_state);
19695         return;
19696     }
19697     ts = matrixtilesizeb(_state)/2;
19698     minmn = ae_minint(m, n, _state);
19699     ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
19700     ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
19701     ae_vector_set_length(tau, minmn, _state);
19702     ae_vector_set_length(&taubuf, minmn, _state);
19703     ae_matrix_set_length(&tmpa, ts, n, _state);
19704     ae_matrix_set_length(&tmpt, ts, ts, _state);
19705     ae_matrix_set_length(&tmpr, m, 2*ts, _state);
19706 
19707     /*
19708      * Blocked code
19709      */
19710     blockstart = 0;
19711     while(blockstart!=minmn)
19712     {
19713 
19714         /*
19715          * Determine block size
19716          */
19717         blocksize = minmn-blockstart;
19718         if( blocksize>ts )
19719         {
19720             blocksize = ts;
19721         }
19722         columnscount = n-blockstart;
19723 
19724         /*
19725          * LQ decomposition of submatrix.
19726          * Matrix is copied to temporary storage to solve
19727          * some TLB issues arising from non-contiguous memory
19728          * access pattern.
19729          */
19730         cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
19731         ortfac_cmatrixlqbasecase(&tmpa, blocksize, columnscount, &work, &t, &taubuf, _state);
19732         cmatrixcopy(blocksize, columnscount, &tmpa, 0, 0, a, blockstart, blockstart, _state);
19733         ae_v_cmove(&tau->ptr.p_complex[blockstart], 1, &taubuf.ptr.p_complex[0], 1, "N", ae_v_len(blockstart,blockstart+blocksize-1));
19734 
19735         /*
19736          * Update the rest, choose between:
19737          * a) Level 2 algorithm (when the rest of the matrix is small enough)
19738          * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
19739          *    representation for products of Householder transformations',
19740          *    by R. Schreiber and C. Van Loan.
19741          */
19742         if( blockstart+blocksize<=m-1 )
19743         {
19744             if( m-blockstart-blocksize>=2*ts )
19745             {
19746 
19747                 /*
19748                  * Prepare block reflector
19749                  */
19750                 ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);
19751 
19752                 /*
19753                  * Multiply the rest of A by Q.
19754                  *
19755                  * Q  = E + Y*T*Y'  = E + TmpA'*TmpT*TmpA
19756                  */
19757                 cmatrixgemm(m-blockstart-blocksize, blocksize, columnscount, ae_complex_from_d(1.0), a, blockstart+blocksize, blockstart, 0, &tmpa, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
19758                 cmatrixgemm(m-blockstart-blocksize, blocksize, blocksize, ae_complex_from_d(1.0), &tmpr, 0, 0, 0, &tmpt, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, 0, blocksize, _state);
19759                 cmatrixgemm(m-blockstart-blocksize, columnscount, blocksize, ae_complex_from_d(1.0), &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, ae_complex_from_d(1.0), a, blockstart+blocksize, blockstart, _state);
19760             }
19761             else
19762             {
19763 
19764                 /*
19765                  * Level 2 algorithm
19766                  */
19767                 for(i=0; i<=blocksize-1; i++)
19768                 {
19769                     ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,columnscount-i));
19770                     t.ptr.p_complex[1] = ae_complex_from_i(1);
19771                     complexapplyreflectionfromtheright(a, taubuf.ptr.p_complex[i], &t, blockstart+blocksize, m-1, blockstart+i, n-1, &work, _state);
19772                 }
19773             }
19774         }
19775 
19776         /*
19777          * Advance
19778          */
19779         blockstart = blockstart+blocksize;
19780     }
19781     ae_frame_leave(_state);
19782 }
19783 
19784 
19785 /*************************************************************************
19786 Partial unpacking of matrix Q from the QR decomposition of a matrix A
19787 
19788 Input parameters:
19789     A       -   matrices Q and R in compact form.
19790                 Output of RMatrixQR subroutine.
19791     M       -   number of rows in given matrix A. M>=0.
19792     N       -   number of columns in given matrix A. N>=0.
19793     Tau     -   scalar factors which are used to form Q.
19794                 Output of the RMatrixQR subroutine.
19795     QColumns -  required number of columns of matrix Q. M>=QColumns>=0.
19796 
19797 Output parameters:
19798     Q       -   first QColumns columns of matrix Q.
19799                 Array whose indexes range within [0..M-1, 0..QColumns-1].
19800                 If QColumns=0, the array remains unchanged.
19801 
19802   ! FREE EDITION OF ALGLIB:
19803   !
19804   ! Free Edition of ALGLIB supports following important features for  this
19805   ! function:
19806   ! * C++ version: x64 SIMD support using C++ intrinsics
19807   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
19808   !
19809   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
19810   ! Reference Manual in order  to  find  out  how to activate SIMD support
19811   ! in ALGLIB.
19812 
19813   ! COMMERCIAL EDITION OF ALGLIB:
19814   !
19815   ! Commercial Edition of ALGLIB includes following important improvements
19816   ! of this function:
19817   ! * high-performance native backend with same C# interface (C# version)
19818   ! * multithreading support (C++ and C# versions)
19819   ! * hardware vendor (Intel) implementations of linear algebra primitives
19820   !   (C++ and C# versions, x86/x64 platform)
19821   !
19822   ! We recommend you to read 'Working with commercial version' section  of
19823   ! ALGLIB Reference Manual in order to find out how to  use  performance-
19824   ! related features provided by commercial edition of ALGLIB.
19825 
19826   -- ALGLIB routine --
19827      17.02.2010
19828      Bochkanov Sergey
19829 *************************************************************************/
rmatrixqrunpackq(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * tau,ae_int_t qcolumns,ae_matrix * q,ae_state * _state)19830 void rmatrixqrunpackq(/* Real    */ ae_matrix* a,
19831      ae_int_t m,
19832      ae_int_t n,
19833      /* Real    */ ae_vector* tau,
19834      ae_int_t qcolumns,
19835      /* Real    */ ae_matrix* q,
19836      ae_state *_state)
19837 {
19838     ae_frame _frame_block;
19839     ae_vector work;
19840     ae_vector t;
19841     ae_vector taubuf;
19842     ae_int_t minmn;
19843     ae_int_t refcnt;
19844     ae_matrix tmpa;
19845     ae_matrix tmpt;
19846     ae_matrix tmpr;
19847     ae_int_t blockstart;
19848     ae_int_t blocksize;
19849     ae_int_t rowscount;
19850     ae_int_t i;
19851     ae_int_t j;
19852     ae_int_t ts;
19853 
19854     ae_frame_make(_state, &_frame_block);
19855     memset(&work, 0, sizeof(work));
19856     memset(&t, 0, sizeof(t));
19857     memset(&taubuf, 0, sizeof(taubuf));
19858     memset(&tmpa, 0, sizeof(tmpa));
19859     memset(&tmpt, 0, sizeof(tmpt));
19860     memset(&tmpr, 0, sizeof(tmpr));
19861     ae_matrix_clear(q);
19862     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
19863     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
19864     ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
19865     ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
19866     ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
19867     ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);
19868 
19869     ae_assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!", _state);
19870     if( (m<=0||n<=0)||qcolumns<=0 )
19871     {
19872         ae_frame_leave(_state);
19873         return;
19874     }
19875 
19876     /*
19877      * init
19878      */
19879     ts = matrixtilesizeb(_state);
19880     minmn = ae_minint(m, n, _state);
19881     refcnt = ae_minint(minmn, qcolumns, _state);
19882     ae_matrix_set_length(q, m, qcolumns, _state);
19883     for(i=0; i<=m-1; i++)
19884     {
19885         for(j=0; j<=qcolumns-1; j++)
19886         {
19887             if( i==j )
19888             {
19889                 q->ptr.pp_double[i][j] = (double)(1);
19890             }
19891             else
19892             {
19893                 q->ptr.pp_double[i][j] = (double)(0);
19894             }
19895         }
19896     }
19897     ae_vector_set_length(&work, ae_maxint(m, qcolumns, _state)+1, _state);
19898     ae_vector_set_length(&t, ae_maxint(m, qcolumns, _state)+1, _state);
19899     ae_vector_set_length(&taubuf, minmn, _state);
19900     ae_matrix_set_length(&tmpa, m, ts, _state);
19901     ae_matrix_set_length(&tmpt, ts, 2*ts, _state);
19902     ae_matrix_set_length(&tmpr, 2*ts, qcolumns, _state);
19903 
19904     /*
19905      * Blocked code
19906      */
19907     blockstart = ts*(refcnt/ts);
19908     blocksize = refcnt-blockstart;
19909     while(blockstart>=0)
19910     {
19911         rowscount = m-blockstart;
19912         if( blocksize>0 )
19913         {
19914 
19915             /*
19916              * Copy current block
19917              */
19918             rmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
19919             ae_v_move(&taubuf.ptr.p_double[0], 1, &tau->ptr.p_double[blockstart], 1, ae_v_len(0,blocksize-1));
19920 
19921             /*
19922              * Update, choose between:
19923              * a) Level 2 algorithm (when the rest of the matrix is small enough)
19924              * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
19925              *    representation for products of Householder transformations',
19926              *    by R. Schreiber and C. Van Loan.
19927              */
19928             if( qcolumns>=2*ts )
19929             {
19930 
19931                 /*
19932                  * Prepare block reflector
19933                  */
19934                 ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);
19935 
19936                 /*
19937                  * Multiply matrix by Q.
19938                  *
19939                  * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
19940                  */
19941                 rmatrixgemm(blocksize, qcolumns, rowscount, 1.0, &tmpa, 0, 0, 1, q, blockstart, 0, 0, 0.0, &tmpr, 0, 0, _state);
19942                 rmatrixgemm(blocksize, qcolumns, blocksize, 1.0, &tmpt, 0, 0, 0, &tmpr, 0, 0, 0, 0.0, &tmpr, blocksize, 0, _state);
19943                 rmatrixgemm(rowscount, qcolumns, blocksize, 1.0, &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, 1.0, q, blockstart, 0, _state);
19944             }
19945             else
19946             {
19947 
19948                 /*
19949                  * Level 2 algorithm
19950                  */
19951                 for(i=blocksize-1; i>=0; i--)
19952                 {
19953                     ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], tmpa.stride, ae_v_len(1,rowscount-i));
19954                     t.ptr.p_double[1] = (double)(1);
19955                     applyreflectionfromtheleft(q, taubuf.ptr.p_double[i], &t, blockstart+i, m-1, 0, qcolumns-1, &work, _state);
19956                 }
19957             }
19958         }
19959 
19960         /*
19961          * Advance
19962          */
19963         blockstart = blockstart-ts;
19964         blocksize = ts;
19965     }
19966     ae_frame_leave(_state);
19967 }
19968 
19969 
19970 /*************************************************************************
19971 Unpacking of matrix R from the QR decomposition of a matrix A
19972 
19973 Input parameters:
19974     A       -   matrices Q and R in compact form.
19975                 Output of RMatrixQR subroutine.
19976     M       -   number of rows in given matrix A. M>=0.
19977     N       -   number of columns in given matrix A. N>=0.
19978 
19979 Output parameters:
19980     R       -   matrix R, array[0..M-1, 0..N-1].
19981 
19982   -- ALGLIB routine --
19983      17.02.2010
19984      Bochkanov Sergey
19985 *************************************************************************/
rmatrixqrunpackr(ae_matrix * a,ae_int_t m,ae_int_t n,ae_matrix * r,ae_state * _state)19986 void rmatrixqrunpackr(/* Real    */ ae_matrix* a,
19987      ae_int_t m,
19988      ae_int_t n,
19989      /* Real    */ ae_matrix* r,
19990      ae_state *_state)
19991 {
19992     ae_int_t i;
19993     ae_int_t k;
19994 
19995     ae_matrix_clear(r);
19996 
19997     if( m<=0||n<=0 )
19998     {
19999         return;
20000     }
20001     k = ae_minint(m, n, _state);
20002     ae_matrix_set_length(r, m, n, _state);
20003     for(i=0; i<=n-1; i++)
20004     {
20005         r->ptr.pp_double[0][i] = (double)(0);
20006     }
20007     for(i=1; i<=m-1; i++)
20008     {
20009         ae_v_move(&r->ptr.pp_double[i][0], 1, &r->ptr.pp_double[0][0], 1, ae_v_len(0,n-1));
20010     }
20011     for(i=0; i<=k-1; i++)
20012     {
20013         ae_v_move(&r->ptr.pp_double[i][i], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
20014     }
20015 }
20016 
20017 
20018 /*************************************************************************
20019 Partial unpacking of matrix Q from the LQ decomposition of a matrix A
20020 
20021 Input parameters:
20022     A       -   matrices L and Q in compact form.
20023                 Output of RMatrixLQ subroutine.
20024     M       -   number of rows in given matrix A. M>=0.
20025     N       -   number of columns in given matrix A. N>=0.
20026     Tau     -   scalar factors which are used to form Q.
20027                 Output of the RMatrixLQ subroutine.
20028     QRows   -   required number of rows in matrix Q. N>=QRows>=0.
20029 
20030 Output parameters:
20031     Q       -   first QRows rows of matrix Q. Array whose indexes range
20032                 within [0..QRows-1, 0..N-1]. If QRows=0, the array remains
20033                 unchanged.
20034 
20035   ! FREE EDITION OF ALGLIB:
20036   !
20037   ! Free Edition of ALGLIB supports following important features for  this
20038   ! function:
20039   ! * C++ version: x64 SIMD support using C++ intrinsics
20040   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
20041   !
20042   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
20043   ! Reference Manual in order  to  find  out  how to activate SIMD support
20044   ! in ALGLIB.
20045 
20046   ! COMMERCIAL EDITION OF ALGLIB:
20047   !
20048   ! Commercial Edition of ALGLIB includes following important improvements
20049   ! of this function:
20050   ! * high-performance native backend with same C# interface (C# version)
20051   ! * multithreading support (C++ and C# versions)
20052   ! * hardware vendor (Intel) implementations of linear algebra primitives
20053   !   (C++ and C# versions, x86/x64 platform)
20054   !
20055   ! We recommend you to read 'Working with commercial version' section  of
20056   ! ALGLIB Reference Manual in order to find out how to  use  performance-
20057   ! related features provided by commercial edition of ALGLIB.
20058 
20059   -- ALGLIB routine --
20060      17.02.2010
20061      Bochkanov Sergey
20062 *************************************************************************/
rmatrixlqunpackq(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * tau,ae_int_t qrows,ae_matrix * q,ae_state * _state)20063 void rmatrixlqunpackq(/* Real    */ ae_matrix* a,
20064      ae_int_t m,
20065      ae_int_t n,
20066      /* Real    */ ae_vector* tau,
20067      ae_int_t qrows,
20068      /* Real    */ ae_matrix* q,
20069      ae_state *_state)
20070 {
20071     ae_frame _frame_block;
20072     ae_vector work;
20073     ae_vector t;
20074     ae_vector taubuf;
20075     ae_int_t minmn;
20076     ae_int_t refcnt;
20077     ae_matrix tmpa;
20078     ae_matrix tmpt;
20079     ae_matrix tmpr;
20080     ae_int_t blockstart;
20081     ae_int_t blocksize;
20082     ae_int_t columnscount;
20083     ae_int_t i;
20084     ae_int_t j;
20085     ae_int_t ts;
20086 
20087     ae_frame_make(_state, &_frame_block);
20088     memset(&work, 0, sizeof(work));
20089     memset(&t, 0, sizeof(t));
20090     memset(&taubuf, 0, sizeof(taubuf));
20091     memset(&tmpa, 0, sizeof(tmpa));
20092     memset(&tmpt, 0, sizeof(tmpt));
20093     memset(&tmpr, 0, sizeof(tmpr));
20094     ae_matrix_clear(q);
20095     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
20096     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
20097     ae_vector_init(&taubuf, 0, DT_REAL, _state, ae_true);
20098     ae_matrix_init(&tmpa, 0, 0, DT_REAL, _state, ae_true);
20099     ae_matrix_init(&tmpt, 0, 0, DT_REAL, _state, ae_true);
20100     ae_matrix_init(&tmpr, 0, 0, DT_REAL, _state, ae_true);
20101 
20102     ae_assert(qrows<=n, "RMatrixLQUnpackQ: QRows>N!", _state);
20103     if( (m<=0||n<=0)||qrows<=0 )
20104     {
20105         ae_frame_leave(_state);
20106         return;
20107     }
20108 
20109     /*
20110      * init
20111      */
20112     ts = matrixtilesizeb(_state);
20113     minmn = ae_minint(m, n, _state);
20114     refcnt = ae_minint(minmn, qrows, _state);
20115     ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
20116     ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
20117     ae_vector_set_length(&taubuf, minmn, _state);
20118     ae_matrix_set_length(&tmpa, ts, n, _state);
20119     ae_matrix_set_length(&tmpt, ts, 2*ts, _state);
20120     ae_matrix_set_length(&tmpr, qrows, 2*ts, _state);
20121     ae_matrix_set_length(q, qrows, n, _state);
20122     for(i=0; i<=qrows-1; i++)
20123     {
20124         for(j=0; j<=n-1; j++)
20125         {
20126             if( i==j )
20127             {
20128                 q->ptr.pp_double[i][j] = (double)(1);
20129             }
20130             else
20131             {
20132                 q->ptr.pp_double[i][j] = (double)(0);
20133             }
20134         }
20135     }
20136 
20137     /*
20138      * Blocked code
20139      */
20140     blockstart = ts*(refcnt/ts);
20141     blocksize = refcnt-blockstart;
20142     while(blockstart>=0)
20143     {
20144         columnscount = n-blockstart;
20145         if( blocksize>0 )
20146         {
20147 
20148             /*
20149              * Copy submatrix
20150              */
20151             rmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
20152             ae_v_move(&taubuf.ptr.p_double[0], 1, &tau->ptr.p_double[blockstart], 1, ae_v_len(0,blocksize-1));
20153 
20154             /*
20155              * Update matrix, choose between:
20156              * a) Level 2 algorithm (when the rest of the matrix is small enough)
20157              * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
20158              *    representation for products of Householder transformations',
20159              *    by R. Schreiber and C. Van Loan.
20160              */
20161             if( qrows>=2*ts )
20162             {
20163 
20164                 /*
20165                  * Prepare block reflector
20166                  */
20167                 ortfac_rmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);
20168 
20169                 /*
20170                  * Multiply the rest of A by Q'.
20171                  *
20172                  * Q'  = E + Y*T'*Y'  = E + TmpA'*TmpT'*TmpA
20173                  */
20174                 rmatrixgemm(qrows, blocksize, columnscount, 1.0, q, 0, blockstart, 0, &tmpa, 0, 0, 1, 0.0, &tmpr, 0, 0, _state);
20175                 rmatrixgemm(qrows, blocksize, blocksize, 1.0, &tmpr, 0, 0, 0, &tmpt, 0, 0, 1, 0.0, &tmpr, 0, blocksize, _state);
20176                 rmatrixgemm(qrows, columnscount, blocksize, 1.0, &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, 1.0, q, 0, blockstart, _state);
20177             }
20178             else
20179             {
20180 
20181                 /*
20182                  * Level 2 algorithm
20183                  */
20184                 for(i=blocksize-1; i>=0; i--)
20185                 {
20186                     ae_v_move(&t.ptr.p_double[1], 1, &tmpa.ptr.pp_double[i][i], 1, ae_v_len(1,columnscount-i));
20187                     t.ptr.p_double[1] = (double)(1);
20188                     applyreflectionfromtheright(q, taubuf.ptr.p_double[i], &t, 0, qrows-1, blockstart+i, n-1, &work, _state);
20189                 }
20190             }
20191         }
20192 
20193         /*
20194          * Advance
20195          */
20196         blockstart = blockstart-ts;
20197         blocksize = ts;
20198     }
20199     ae_frame_leave(_state);
20200 }
20201 
20202 
20203 /*************************************************************************
20204 Unpacking of matrix L from the LQ decomposition of a matrix A
20205 
20206 Input parameters:
20207     A       -   matrices Q and L in compact form.
20208                 Output of RMatrixLQ subroutine.
20209     M       -   number of rows in given matrix A. M>=0.
20210     N       -   number of columns in given matrix A. N>=0.
20211 
20212 Output parameters:
20213     L       -   matrix L, array[0..M-1, 0..N-1].
20214 
20215   -- ALGLIB routine --
20216      17.02.2010
20217      Bochkanov Sergey
20218 *************************************************************************/
rmatrixlqunpackl(ae_matrix * a,ae_int_t m,ae_int_t n,ae_matrix * l,ae_state * _state)20219 void rmatrixlqunpackl(/* Real    */ ae_matrix* a,
20220      ae_int_t m,
20221      ae_int_t n,
20222      /* Real    */ ae_matrix* l,
20223      ae_state *_state)
20224 {
20225     ae_int_t i;
20226     ae_int_t k;
20227 
20228     ae_matrix_clear(l);
20229 
20230     if( m<=0||n<=0 )
20231     {
20232         return;
20233     }
20234     ae_matrix_set_length(l, m, n, _state);
20235     for(i=0; i<=n-1; i++)
20236     {
20237         l->ptr.pp_double[0][i] = (double)(0);
20238     }
20239     for(i=1; i<=m-1; i++)
20240     {
20241         ae_v_move(&l->ptr.pp_double[i][0], 1, &l->ptr.pp_double[0][0], 1, ae_v_len(0,n-1));
20242     }
20243     for(i=0; i<=m-1; i++)
20244     {
20245         k = ae_minint(i, n-1, _state);
20246         ae_v_move(&l->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k));
20247     }
20248 }
20249 
20250 
20251 /*************************************************************************
20252 Partial unpacking of matrix Q from QR decomposition of a complex matrix A.
20253 
20254 Input parameters:
20255     A           -   matrices Q and R in compact form.
20256                     Output of CMatrixQR subroutine .
20257     M           -   number of rows in matrix A. M>=0.
20258     N           -   number of columns in matrix A. N>=0.
20259     Tau         -   scalar factors which are used to form Q.
20260                     Output of CMatrixQR subroutine .
20261     QColumns    -   required number of columns in matrix Q. M>=QColumns>=0.
20262 
20263 Output parameters:
20264     Q           -   first QColumns columns of matrix Q.
20265                     Array whose index ranges within [0..M-1, 0..QColumns-1].
20266                     If QColumns=0, array isn't changed.
20267 
20268   ! FREE EDITION OF ALGLIB:
20269   !
20270   ! Free Edition of ALGLIB supports following important features for  this
20271   ! function:
20272   ! * C++ version: x64 SIMD support using C++ intrinsics
20273   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
20274   !
20275   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
20276   ! Reference Manual in order  to  find  out  how to activate SIMD support
20277   ! in ALGLIB.
20278 
20279   ! COMMERCIAL EDITION OF ALGLIB:
20280   !
20281   ! Commercial Edition of ALGLIB includes following important improvements
20282   ! of this function:
20283   ! * high-performance native backend with same C# interface (C# version)
20284   ! * multithreading support (C++ and C# versions)
20285   ! * hardware vendor (Intel) implementations of linear algebra primitives
20286   !   (C++ and C# versions, x86/x64 platform)
20287   !
20288   ! We recommend you to read 'Working with commercial version' section  of
20289   ! ALGLIB Reference Manual in order to find out how to  use  performance-
20290   ! related features provided by commercial edition of ALGLIB.
20291 
20292   -- ALGLIB routine --
20293      17.02.2010
20294      Bochkanov Sergey
20295 *************************************************************************/
cmatrixqrunpackq(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * tau,ae_int_t qcolumns,ae_matrix * q,ae_state * _state)20296 void cmatrixqrunpackq(/* Complex */ ae_matrix* a,
20297      ae_int_t m,
20298      ae_int_t n,
20299      /* Complex */ ae_vector* tau,
20300      ae_int_t qcolumns,
20301      /* Complex */ ae_matrix* q,
20302      ae_state *_state)
20303 {
20304     ae_frame _frame_block;
20305     ae_vector work;
20306     ae_vector t;
20307     ae_vector taubuf;
20308     ae_int_t minmn;
20309     ae_int_t refcnt;
20310     ae_matrix tmpa;
20311     ae_matrix tmpt;
20312     ae_matrix tmpr;
20313     ae_int_t blockstart;
20314     ae_int_t blocksize;
20315     ae_int_t rowscount;
20316     ae_int_t i;
20317     ae_int_t j;
20318     ae_int_t ts;
20319 
20320     ae_frame_make(_state, &_frame_block);
20321     memset(&work, 0, sizeof(work));
20322     memset(&t, 0, sizeof(t));
20323     memset(&taubuf, 0, sizeof(taubuf));
20324     memset(&tmpa, 0, sizeof(tmpa));
20325     memset(&tmpt, 0, sizeof(tmpt));
20326     memset(&tmpr, 0, sizeof(tmpr));
20327     ae_matrix_clear(q);
20328     ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
20329     ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
20330     ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
20331     ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
20332     ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
20333     ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);
20334 
20335     ae_assert(qcolumns<=m, "UnpackQFromQR: QColumns>M!", _state);
20336     if( m<=0||n<=0 )
20337     {
20338         ae_frame_leave(_state);
20339         return;
20340     }
20341 
20342     /*
20343      * init
20344      */
20345     ts = matrixtilesizeb(_state)/2;
20346     minmn = ae_minint(m, n, _state);
20347     refcnt = ae_minint(minmn, qcolumns, _state);
20348     ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
20349     ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
20350     ae_vector_set_length(&taubuf, minmn, _state);
20351     ae_matrix_set_length(&tmpa, m, ts, _state);
20352     ae_matrix_set_length(&tmpt, ts, ts, _state);
20353     ae_matrix_set_length(&tmpr, 2*ts, qcolumns, _state);
20354     ae_matrix_set_length(q, m, qcolumns, _state);
20355     for(i=0; i<=m-1; i++)
20356     {
20357         for(j=0; j<=qcolumns-1; j++)
20358         {
20359             if( i==j )
20360             {
20361                 q->ptr.pp_complex[i][j] = ae_complex_from_i(1);
20362             }
20363             else
20364             {
20365                 q->ptr.pp_complex[i][j] = ae_complex_from_i(0);
20366             }
20367         }
20368     }
20369 
20370     /*
20371      * Blocked code
20372      */
20373     blockstart = ts*(refcnt/ts);
20374     blocksize = refcnt-blockstart;
20375     while(blockstart>=0)
20376     {
20377         rowscount = m-blockstart;
20378         if( blocksize>0 )
20379         {
20380 
20381             /*
20382              * QR decomposition of submatrix.
20383              * Matrix is copied to temporary storage to solve
20384              * some TLB issues arising from non-contiguous memory
20385              * access pattern.
20386              */
20387             cmatrixcopy(rowscount, blocksize, a, blockstart, blockstart, &tmpa, 0, 0, _state);
20388             ae_v_cmove(&taubuf.ptr.p_complex[0], 1, &tau->ptr.p_complex[blockstart], 1, "N", ae_v_len(0,blocksize-1));
20389 
20390             /*
20391              * Update matrix, choose between:
20392              * a) Level 2 algorithm (when the rest of the matrix is small enough)
20393              * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
20394              *    representation for products of Householder transformations',
20395              *    by R. Schreiber and C. Van Loan.
20396              */
20397             if( qcolumns>=2*ts )
20398             {
20399 
20400                 /*
20401                  * Prepare block reflector
20402                  */
20403                 ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_true, rowscount, blocksize, &tmpt, &work, _state);
20404 
20405                 /*
20406                  * Multiply the rest of A by Q.
20407                  *
20408                  * Q  = E + Y*T*Y'  = E + TmpA*TmpT*TmpA'
20409                  */
20410                 cmatrixgemm(blocksize, qcolumns, rowscount, ae_complex_from_d(1.0), &tmpa, 0, 0, 2, q, blockstart, 0, 0, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
20411                 cmatrixgemm(blocksize, qcolumns, blocksize, ae_complex_from_d(1.0), &tmpt, 0, 0, 0, &tmpr, 0, 0, 0, ae_complex_from_d(0.0), &tmpr, blocksize, 0, _state);
20412                 cmatrixgemm(rowscount, qcolumns, blocksize, ae_complex_from_d(1.0), &tmpa, 0, 0, 0, &tmpr, blocksize, 0, 0, ae_complex_from_d(1.0), q, blockstart, 0, _state);
20413             }
20414             else
20415             {
20416 
20417                 /*
20418                  * Level 2 algorithm
20419                  */
20420                 for(i=blocksize-1; i>=0; i--)
20421                 {
20422                     ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], tmpa.stride, "N", ae_v_len(1,rowscount-i));
20423                     t.ptr.p_complex[1] = ae_complex_from_i(1);
20424                     complexapplyreflectionfromtheleft(q, taubuf.ptr.p_complex[i], &t, blockstart+i, m-1, 0, qcolumns-1, &work, _state);
20425                 }
20426             }
20427         }
20428 
20429         /*
20430          * Advance
20431          */
20432         blockstart = blockstart-ts;
20433         blocksize = ts;
20434     }
20435     ae_frame_leave(_state);
20436 }
20437 
20438 
20439 /*************************************************************************
20440 Unpacking of matrix R from the QR decomposition of a matrix A
20441 
20442 Input parameters:
20443     A       -   matrices Q and R in compact form.
20444                 Output of CMatrixQR subroutine.
20445     M       -   number of rows in given matrix A. M>=0.
20446     N       -   number of columns in given matrix A. N>=0.
20447 
20448 Output parameters:
20449     R       -   matrix R, array[0..M-1, 0..N-1].
20450 
20451   -- ALGLIB routine --
20452      17.02.2010
20453      Bochkanov Sergey
20454 *************************************************************************/
cmatrixqrunpackr(ae_matrix * a,ae_int_t m,ae_int_t n,ae_matrix * r,ae_state * _state)20455 void cmatrixqrunpackr(/* Complex */ ae_matrix* a,
20456      ae_int_t m,
20457      ae_int_t n,
20458      /* Complex */ ae_matrix* r,
20459      ae_state *_state)
20460 {
20461     ae_int_t i;
20462     ae_int_t k;
20463 
20464     ae_matrix_clear(r);
20465 
20466     if( m<=0||n<=0 )
20467     {
20468         return;
20469     }
20470     k = ae_minint(m, n, _state);
20471     ae_matrix_set_length(r, m, n, _state);
20472     for(i=0; i<=n-1; i++)
20473     {
20474         r->ptr.pp_complex[0][i] = ae_complex_from_i(0);
20475     }
20476     for(i=1; i<=m-1; i++)
20477     {
20478         ae_v_cmove(&r->ptr.pp_complex[i][0], 1, &r->ptr.pp_complex[0][0], 1, "N", ae_v_len(0,n-1));
20479     }
20480     for(i=0; i<=k-1; i++)
20481     {
20482         ae_v_cmove(&r->ptr.pp_complex[i][i], 1, &a->ptr.pp_complex[i][i], 1, "N", ae_v_len(i,n-1));
20483     }
20484 }
20485 
20486 
20487 /*************************************************************************
20488 Partial unpacking of matrix Q from LQ decomposition of a complex matrix A.
20489 
20490 Input parameters:
20491     A           -   matrices Q and R in compact form.
20492                     Output of CMatrixLQ subroutine .
20493     M           -   number of rows in matrix A. M>=0.
20494     N           -   number of columns in matrix A. N>=0.
20495     Tau         -   scalar factors which are used to form Q.
20496                     Output of CMatrixLQ subroutine .
20497     QRows       -   required number of rows in matrix Q. N>=QColumns>=0.
20498 
20499 Output parameters:
20500     Q           -   first QRows rows of matrix Q.
20501                     Array whose index ranges within [0..QRows-1, 0..N-1].
20502                     If QRows=0, array isn't changed.
20503 
20504   ! FREE EDITION OF ALGLIB:
20505   !
20506   ! Free Edition of ALGLIB supports following important features for  this
20507   ! function:
20508   ! * C++ version: x64 SIMD support using C++ intrinsics
20509   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
20510   !
20511   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
20512   ! Reference Manual in order  to  find  out  how to activate SIMD support
20513   ! in ALGLIB.
20514 
20515   ! COMMERCIAL EDITION OF ALGLIB:
20516   !
20517   ! Commercial Edition of ALGLIB includes following important improvements
20518   ! of this function:
20519   ! * high-performance native backend with same C# interface (C# version)
20520   ! * multithreading support (C++ and C# versions)
20521   ! * hardware vendor (Intel) implementations of linear algebra primitives
20522   !   (C++ and C# versions, x86/x64 platform)
20523   !
20524   ! We recommend you to read 'Working with commercial version' section  of
20525   ! ALGLIB Reference Manual in order to find out how to  use  performance-
20526   ! related features provided by commercial edition of ALGLIB.
20527 
20528   -- ALGLIB routine --
20529      17.02.2010
20530      Bochkanov Sergey
20531 *************************************************************************/
cmatrixlqunpackq(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * tau,ae_int_t qrows,ae_matrix * q,ae_state * _state)20532 void cmatrixlqunpackq(/* Complex */ ae_matrix* a,
20533      ae_int_t m,
20534      ae_int_t n,
20535      /* Complex */ ae_vector* tau,
20536      ae_int_t qrows,
20537      /* Complex */ ae_matrix* q,
20538      ae_state *_state)
20539 {
20540     ae_frame _frame_block;
20541     ae_vector work;
20542     ae_vector t;
20543     ae_vector taubuf;
20544     ae_int_t minmn;
20545     ae_int_t refcnt;
20546     ae_matrix tmpa;
20547     ae_matrix tmpt;
20548     ae_matrix tmpr;
20549     ae_int_t blockstart;
20550     ae_int_t blocksize;
20551     ae_int_t columnscount;
20552     ae_int_t i;
20553     ae_int_t j;
20554     ae_int_t ts;
20555 
20556     ae_frame_make(_state, &_frame_block);
20557     memset(&work, 0, sizeof(work));
20558     memset(&t, 0, sizeof(t));
20559     memset(&taubuf, 0, sizeof(taubuf));
20560     memset(&tmpa, 0, sizeof(tmpa));
20561     memset(&tmpt, 0, sizeof(tmpt));
20562     memset(&tmpr, 0, sizeof(tmpr));
20563     ae_matrix_clear(q);
20564     ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
20565     ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
20566     ae_vector_init(&taubuf, 0, DT_COMPLEX, _state, ae_true);
20567     ae_matrix_init(&tmpa, 0, 0, DT_COMPLEX, _state, ae_true);
20568     ae_matrix_init(&tmpt, 0, 0, DT_COMPLEX, _state, ae_true);
20569     ae_matrix_init(&tmpr, 0, 0, DT_COMPLEX, _state, ae_true);
20570 
20571     if( m<=0||n<=0 )
20572     {
20573         ae_frame_leave(_state);
20574         return;
20575     }
20576 
20577     /*
20578      * Init
20579      */
20580     ts = matrixtilesizeb(_state)/2;
20581     minmn = ae_minint(m, n, _state);
20582     refcnt = ae_minint(minmn, qrows, _state);
20583     ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
20584     ae_vector_set_length(&t, ae_maxint(m, n, _state)+1, _state);
20585     ae_vector_set_length(&taubuf, minmn, _state);
20586     ae_matrix_set_length(&tmpa, ts, n, _state);
20587     ae_matrix_set_length(&tmpt, ts, ts, _state);
20588     ae_matrix_set_length(&tmpr, qrows, 2*ts, _state);
20589     ae_matrix_set_length(q, qrows, n, _state);
20590     for(i=0; i<=qrows-1; i++)
20591     {
20592         for(j=0; j<=n-1; j++)
20593         {
20594             if( i==j )
20595             {
20596                 q->ptr.pp_complex[i][j] = ae_complex_from_i(1);
20597             }
20598             else
20599             {
20600                 q->ptr.pp_complex[i][j] = ae_complex_from_i(0);
20601             }
20602         }
20603     }
20604 
20605     /*
20606      * Blocked code
20607      */
20608     blockstart = ts*(refcnt/ts);
20609     blocksize = refcnt-blockstart;
20610     while(blockstart>=0)
20611     {
20612         columnscount = n-blockstart;
20613         if( blocksize>0 )
20614         {
20615 
20616             /*
20617              * LQ decomposition of submatrix.
20618              * Matrix is copied to temporary storage to solve
20619              * some TLB issues arising from non-contiguous memory
20620              * access pattern.
20621              */
20622             cmatrixcopy(blocksize, columnscount, a, blockstart, blockstart, &tmpa, 0, 0, _state);
20623             ae_v_cmove(&taubuf.ptr.p_complex[0], 1, &tau->ptr.p_complex[blockstart], 1, "N", ae_v_len(0,blocksize-1));
20624 
20625             /*
20626              * Update matrix, choose between:
20627              * a) Level 2 algorithm (when the rest of the matrix is small enough)
20628              * b) blocked algorithm, see algorithm 5 from  'A storage efficient WY
20629              *    representation for products of Householder transformations',
20630              *    by R. Schreiber and C. Van Loan.
20631              */
20632             if( qrows>=2*ts )
20633             {
20634 
20635                 /*
20636                  * Prepare block reflector
20637                  */
20638                 ortfac_cmatrixblockreflector(&tmpa, &taubuf, ae_false, columnscount, blocksize, &tmpt, &work, _state);
20639 
20640                 /*
20641                  * Multiply the rest of A by Q'.
20642                  *
20643                  * Q'  = E + Y*T'*Y'  = E + TmpA'*TmpT'*TmpA
20644                  */
20645                 cmatrixgemm(qrows, blocksize, columnscount, ae_complex_from_d(1.0), q, 0, blockstart, 0, &tmpa, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, 0, _state);
20646                 cmatrixgemm(qrows, blocksize, blocksize, ae_complex_from_d(1.0), &tmpr, 0, 0, 0, &tmpt, 0, 0, 2, ae_complex_from_d(0.0), &tmpr, 0, blocksize, _state);
20647                 cmatrixgemm(qrows, columnscount, blocksize, ae_complex_from_d(1.0), &tmpr, 0, blocksize, 0, &tmpa, 0, 0, 0, ae_complex_from_d(1.0), q, 0, blockstart, _state);
20648             }
20649             else
20650             {
20651 
20652                 /*
20653                  * Level 2 algorithm
20654                  */
20655                 for(i=blocksize-1; i>=0; i--)
20656                 {
20657                     ae_v_cmove(&t.ptr.p_complex[1], 1, &tmpa.ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,columnscount-i));
20658                     t.ptr.p_complex[1] = ae_complex_from_i(1);
20659                     complexapplyreflectionfromtheright(q, ae_c_conj(taubuf.ptr.p_complex[i], _state), &t, 0, qrows-1, blockstart+i, n-1, &work, _state);
20660                 }
20661             }
20662         }
20663 
20664         /*
20665          * Advance
20666          */
20667         blockstart = blockstart-ts;
20668         blocksize = ts;
20669     }
20670     ae_frame_leave(_state);
20671 }
20672 
20673 
20674 /*************************************************************************
20675 Unpacking of matrix L from the LQ decomposition of a matrix A
20676 
20677 Input parameters:
20678     A       -   matrices Q and L in compact form.
20679                 Output of CMatrixLQ subroutine.
20680     M       -   number of rows in given matrix A. M>=0.
20681     N       -   number of columns in given matrix A. N>=0.
20682 
20683 Output parameters:
20684     L       -   matrix L, array[0..M-1, 0..N-1].
20685 
20686   -- ALGLIB routine --
20687      17.02.2010
20688      Bochkanov Sergey
20689 *************************************************************************/
cmatrixlqunpackl(ae_matrix * a,ae_int_t m,ae_int_t n,ae_matrix * l,ae_state * _state)20690 void cmatrixlqunpackl(/* Complex */ ae_matrix* a,
20691      ae_int_t m,
20692      ae_int_t n,
20693      /* Complex */ ae_matrix* l,
20694      ae_state *_state)
20695 {
20696     ae_int_t i;
20697     ae_int_t k;
20698 
20699     ae_matrix_clear(l);
20700 
20701     if( m<=0||n<=0 )
20702     {
20703         return;
20704     }
20705     ae_matrix_set_length(l, m, n, _state);
20706     for(i=0; i<=n-1; i++)
20707     {
20708         l->ptr.pp_complex[0][i] = ae_complex_from_i(0);
20709     }
20710     for(i=1; i<=m-1; i++)
20711     {
20712         ae_v_cmove(&l->ptr.pp_complex[i][0], 1, &l->ptr.pp_complex[0][0], 1, "N", ae_v_len(0,n-1));
20713     }
20714     for(i=0; i<=m-1; i++)
20715     {
20716         k = ae_minint(i, n-1, _state);
20717         ae_v_cmove(&l->ptr.pp_complex[i][0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,k));
20718     }
20719 }
20720 
20721 
20722 /*************************************************************************
20723 Base case for real QR
20724 
20725   -- LAPACK routine (version 3.0) --
20726      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
20727      Courant Institute, Argonne National Lab, and Rice University
20728      September 30, 1994.
20729      Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
20730      pseudocode, 2007-2010.
20731 *************************************************************************/
rmatrixqrbasecase(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * work,ae_vector * t,ae_vector * tau,ae_state * _state)20732 void rmatrixqrbasecase(/* Real    */ ae_matrix* a,
20733      ae_int_t m,
20734      ae_int_t n,
20735      /* Real    */ ae_vector* work,
20736      /* Real    */ ae_vector* t,
20737      /* Real    */ ae_vector* tau,
20738      ae_state *_state)
20739 {
20740     ae_int_t i;
20741     ae_int_t k;
20742     ae_int_t minmn;
20743     double tmp;
20744 
20745 
20746     minmn = ae_minint(m, n, _state);
20747 
20748     /*
20749      * Test the input arguments
20750      */
20751     k = minmn;
20752     for(i=0; i<=k-1; i++)
20753     {
20754 
20755         /*
20756          * Generate elementary reflector H(i) to annihilate A(i+1:m,i)
20757          */
20758         ae_v_move(&t->ptr.p_double[1], 1, &a->ptr.pp_double[i][i], a->stride, ae_v_len(1,m-i));
20759         generatereflection(t, m-i, &tmp, _state);
20760         tau->ptr.p_double[i] = tmp;
20761         ae_v_move(&a->ptr.pp_double[i][i], a->stride, &t->ptr.p_double[1], 1, ae_v_len(i,m-1));
20762         t->ptr.p_double[1] = (double)(1);
20763         if( i<n )
20764         {
20765 
20766             /*
20767              * Apply H(i) to A(i:m-1,i+1:n-1) from the left
20768              */
20769             applyreflectionfromtheleft(a, tau->ptr.p_double[i], t, i, m-1, i+1, n-1, work, _state);
20770         }
20771     }
20772 }
20773 
20774 
20775 /*************************************************************************
20776 Base case for real LQ
20777 
20778   -- LAPACK routine (version 3.0) --
20779      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
20780      Courant Institute, Argonne National Lab, and Rice University
20781      September 30, 1994.
20782      Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
20783      pseudocode, 2007-2010.
20784 *************************************************************************/
rmatrixlqbasecase(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * work,ae_vector * t,ae_vector * tau,ae_state * _state)20785 void rmatrixlqbasecase(/* Real    */ ae_matrix* a,
20786      ae_int_t m,
20787      ae_int_t n,
20788      /* Real    */ ae_vector* work,
20789      /* Real    */ ae_vector* t,
20790      /* Real    */ ae_vector* tau,
20791      ae_state *_state)
20792 {
20793     ae_int_t i;
20794     ae_int_t k;
20795     double tmp;
20796 
20797 
20798     k = ae_minint(m, n, _state);
20799     for(i=0; i<=k-1; i++)
20800     {
20801 
20802         /*
20803          * Generate elementary reflector H(i) to annihilate A(i,i+1:n-1)
20804          */
20805         ae_v_move(&t->ptr.p_double[1], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(1,n-i));
20806         generatereflection(t, n-i, &tmp, _state);
20807         tau->ptr.p_double[i] = tmp;
20808         ae_v_move(&a->ptr.pp_double[i][i], 1, &t->ptr.p_double[1], 1, ae_v_len(i,n-1));
20809         t->ptr.p_double[1] = (double)(1);
20810         if( i<n )
20811         {
20812 
20813             /*
20814              * Apply H(i) to A(i+1:m,i:n) from the right
20815              */
20816             applyreflectionfromtheright(a, tau->ptr.p_double[i], t, i+1, m-1, i, n-1, work, _state);
20817         }
20818     }
20819 }
20820 
20821 
20822 /*************************************************************************
20823 Reduction of a rectangular matrix to  bidiagonal form
20824 
20825 The algorithm reduces the rectangular matrix A to  bidiagonal form by
20826 orthogonal transformations P and Q: A = Q*B*(P^T).
20827 
20828   ! COMMERCIAL EDITION OF ALGLIB:
20829   !
20830   ! Commercial Edition of ALGLIB includes following important improvements
20831   ! of this function:
20832   ! * high-performance native backend with same C# interface (C# version)
20833   ! * hardware vendor (Intel) implementations of linear algebra primitives
20834   !   (C++ and C# versions, x86/x64 platform)
20835   !
20836   ! We recommend you to read 'Working with commercial version' section  of
20837   ! ALGLIB Reference Manual in order to find out how to  use  performance-
20838   ! related features provided by commercial edition of ALGLIB.
20839 
20840 Input parameters:
20841     A       -   source matrix. array[0..M-1, 0..N-1]
20842     M       -   number of rows in matrix A.
20843     N       -   number of columns in matrix A.
20844 
20845 Output parameters:
20846     A       -   matrices Q, B, P in compact form (see below).
20847     TauQ    -   scalar factors which are used to form matrix Q.
20848     TauP    -   scalar factors which are used to form matrix P.
20849 
20850 The main diagonal and one of the  secondary  diagonals  of  matrix  A  are
20851 replaced with bidiagonal  matrix  B.  Other  elements  contain  elementary
20852 reflections which form MxM matrix Q and NxN matrix P, respectively.
20853 
20854 If M>=N, B is the upper  bidiagonal  MxN  matrix  and  is  stored  in  the
20855 corresponding  elements  of  matrix  A.  Matrix  Q  is  represented  as  a
20856 product   of   elementary   reflections   Q = H(0)*H(1)*...*H(n-1),  where
20857 H(i) = 1-tau*v*v'. Here tau is a scalar which is stored  in  TauQ[i],  and
20858 vector v has the following  structure:  v(0:i-1)=0, v(i)=1, v(i+1:m-1)  is
20859 stored   in   elements   A(i+1:m-1,i).   Matrix   P  is  as  follows:  P =
20860 G(0)*G(1)*...*G(n-2), where G(i) = 1 - tau*u*u'. Tau is stored in TauP[i],
20861 u(0:i)=0, u(i+1)=1, u(i+2:n-1) is stored in elements A(i,i+2:n-1).
20862 
20863 If M<N, B is the  lower  bidiagonal  MxN  matrix  and  is  stored  in  the
20864 corresponding   elements  of  matrix  A.  Q = H(0)*H(1)*...*H(m-2),  where
20865 H(i) = 1 - tau*v*v', tau is stored in TauQ, v(0:i)=0, v(i+1)=1, v(i+2:m-1)
20866 is    stored    in   elements   A(i+2:m-1,i).    P = G(0)*G(1)*...*G(m-1),
20867 G(i) = 1-tau*u*u', tau is stored in  TauP,  u(0:i-1)=0, u(i)=1, u(i+1:n-1)
20868 is stored in A(i,i+1:n-1).
20869 
20870 EXAMPLE:
20871 
20872 m=6, n=5 (m > n):               m=5, n=6 (m < n):
20873 
20874 (  d   e   u1  u1  u1 )         (  d   u1  u1  u1  u1  u1 )
20875 (  v1  d   e   u2  u2 )         (  e   d   u2  u2  u2  u2 )
20876 (  v1  v2  d   e   u3 )         (  v1  e   d   u3  u3  u3 )
20877 (  v1  v2  v3  d   e  )         (  v1  v2  e   d   u4  u4 )
20878 (  v1  v2  v3  v4  d  )         (  v1  v2  v3  e   d   u5 )
20879 (  v1  v2  v3  v4  v5 )
20880 
20881 Here vi and ui are vectors which form H(i) and G(i), and d and e -
20882 are the diagonal and off-diagonal elements of matrix B.
20883 
20884   -- LAPACK routine (version 3.0) --
20885      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
20886      Courant Institute, Argonne National Lab, and Rice University
20887      September 30, 1994.
20888      Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
20889      pseudocode, 2007-2010.
20890 *************************************************************************/
rmatrixbd(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * tauq,ae_vector * taup,ae_state * _state)20891 void rmatrixbd(/* Real    */ ae_matrix* a,
20892      ae_int_t m,
20893      ae_int_t n,
20894      /* Real    */ ae_vector* tauq,
20895      /* Real    */ ae_vector* taup,
20896      ae_state *_state)
20897 {
20898     ae_frame _frame_block;
20899     ae_vector work;
20900     ae_vector t;
20901     ae_int_t maxmn;
20902     ae_int_t i;
20903     double ltau;
20904 
20905     ae_frame_make(_state, &_frame_block);
20906     memset(&work, 0, sizeof(work));
20907     memset(&t, 0, sizeof(t));
20908     ae_vector_clear(tauq);
20909     ae_vector_clear(taup);
20910     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
20911     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
20912 
20913 
20914     /*
20915      * Prepare
20916      */
20917     if( n<=0||m<=0 )
20918     {
20919         ae_frame_leave(_state);
20920         return;
20921     }
20922     maxmn = ae_maxint(m, n, _state);
20923     ae_vector_set_length(&work, maxmn+1, _state);
20924     ae_vector_set_length(&t, maxmn+1, _state);
20925     if( m>=n )
20926     {
20927         ae_vector_set_length(tauq, n, _state);
20928         ae_vector_set_length(taup, n, _state);
20929         for(i=0; i<=n-1; i++)
20930         {
20931             tauq->ptr.p_double[i] = 0.0;
20932             taup->ptr.p_double[i] = 0.0;
20933         }
20934     }
20935     else
20936     {
20937         ae_vector_set_length(tauq, m, _state);
20938         ae_vector_set_length(taup, m, _state);
20939         for(i=0; i<=m-1; i++)
20940         {
20941             tauq->ptr.p_double[i] = 0.0;
20942             taup->ptr.p_double[i] = 0.0;
20943         }
20944     }
20945 
20946     /*
20947      * Try to use MKL code
20948      *
20949      * NOTE: buffers Work[] and T[] are used for temporary storage of diagonals;
20950      * because they are present in A[], we do not use them.
20951      */
20952     if( rmatrixbdmkl(a, m, n, &work, &t, tauq, taup, _state) )
20953     {
20954         ae_frame_leave(_state);
20955         return;
20956     }
20957 
20958     /*
20959      * ALGLIB code
20960      */
20961     if( m>=n )
20962     {
20963 
20964         /*
20965          * Reduce to upper bidiagonal form
20966          */
20967         for(i=0; i<=n-1; i++)
20968         {
20969 
20970             /*
20971              * Generate elementary reflector H(i) to annihilate A(i+1:m-1,i)
20972              */
20973             ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i], a->stride, ae_v_len(1,m-i));
20974             generatereflection(&t, m-i, &ltau, _state);
20975             tauq->ptr.p_double[i] = ltau;
20976             ae_v_move(&a->ptr.pp_double[i][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i,m-1));
20977             t.ptr.p_double[1] = (double)(1);
20978 
20979             /*
20980              * Apply H(i) to A(i:m-1,i+1:n-1) from the left
20981              */
20982             applyreflectionfromtheleft(a, ltau, &t, i, m-1, i+1, n-1, &work, _state);
20983             if( i<n-1 )
20984             {
20985 
20986                 /*
20987                  * Generate elementary reflector G(i) to annihilate
20988                  * A(i,i+2:n-1)
20989                  */
20990                 ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(1,n-i-1));
20991                 generatereflection(&t, n-1-i, &ltau, _state);
20992                 taup->ptr.p_double[i] = ltau;
20993                 ae_v_move(&a->ptr.pp_double[i][i+1], 1, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1));
20994                 t.ptr.p_double[1] = (double)(1);
20995 
20996                 /*
20997                  * Apply G(i) to A(i+1:m-1,i+1:n-1) from the right
20998                  */
20999                 applyreflectionfromtheright(a, ltau, &t, i+1, m-1, i+1, n-1, &work, _state);
21000             }
21001             else
21002             {
21003                 taup->ptr.p_double[i] = (double)(0);
21004             }
21005         }
21006     }
21007     else
21008     {
21009 
21010         /*
21011          * Reduce to lower bidiagonal form
21012          */
21013         for(i=0; i<=m-1; i++)
21014         {
21015 
21016             /*
21017              * Generate elementary reflector G(i) to annihilate A(i,i+1:n-1)
21018              */
21019             ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i][i], 1, ae_v_len(1,n-i));
21020             generatereflection(&t, n-i, &ltau, _state);
21021             taup->ptr.p_double[i] = ltau;
21022             ae_v_move(&a->ptr.pp_double[i][i], 1, &t.ptr.p_double[1], 1, ae_v_len(i,n-1));
21023             t.ptr.p_double[1] = (double)(1);
21024 
21025             /*
21026              * Apply G(i) to A(i+1:m-1,i:n-1) from the right
21027              */
21028             applyreflectionfromtheright(a, ltau, &t, i+1, m-1, i, n-1, &work, _state);
21029             if( i<m-1 )
21030             {
21031 
21032                 /*
21033                  * Generate elementary reflector H(i) to annihilate
21034                  * A(i+2:m-1,i)
21035                  */
21036                 ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,m-1-i));
21037                 generatereflection(&t, m-1-i, &ltau, _state);
21038                 tauq->ptr.p_double[i] = ltau;
21039                 ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,m-1));
21040                 t.ptr.p_double[1] = (double)(1);
21041 
21042                 /*
21043                  * Apply H(i) to A(i+1:m-1,i+1:n-1) from the left
21044                  */
21045                 applyreflectionfromtheleft(a, ltau, &t, i+1, m-1, i+1, n-1, &work, _state);
21046             }
21047             else
21048             {
21049                 tauq->ptr.p_double[i] = (double)(0);
21050             }
21051         }
21052     }
21053     ae_frame_leave(_state);
21054 }
21055 
21056 
21057 /*************************************************************************
21058 Unpacking matrix Q which reduces a matrix to bidiagonal form.
21059 
21060   ! COMMERCIAL EDITION OF ALGLIB:
21061   !
21062   ! Commercial Edition of ALGLIB includes following important improvements
21063   ! of this function:
21064   ! * high-performance native backend with same C# interface (C# version)
21065   ! * hardware vendor (Intel) implementations of linear algebra primitives
21066   !   (C++ and C# versions, x86/x64 platform)
21067   !
21068   ! We recommend you to read 'Working with commercial version' section  of
21069   ! ALGLIB Reference Manual in order to find out how to  use  performance-
21070   ! related features provided by commercial edition of ALGLIB.
21071 
21072 Input parameters:
21073     QP          -   matrices Q and P in compact form.
21074                     Output of ToBidiagonal subroutine.
21075     M           -   number of rows in matrix A.
21076     N           -   number of columns in matrix A.
21077     TAUQ        -   scalar factors which are used to form Q.
21078                     Output of ToBidiagonal subroutine.
21079     QColumns    -   required number of columns in matrix Q.
21080                     M>=QColumns>=0.
21081 
21082 Output parameters:
21083     Q           -   first QColumns columns of matrix Q.
21084                     Array[0..M-1, 0..QColumns-1]
21085                     If QColumns=0, the array is not modified.
21086 
21087   -- ALGLIB --
21088      2005-2010
21089      Bochkanov Sergey
21090 *************************************************************************/
rmatrixbdunpackq(ae_matrix * qp,ae_int_t m,ae_int_t n,ae_vector * tauq,ae_int_t qcolumns,ae_matrix * q,ae_state * _state)21091 void rmatrixbdunpackq(/* Real    */ ae_matrix* qp,
21092      ae_int_t m,
21093      ae_int_t n,
21094      /* Real    */ ae_vector* tauq,
21095      ae_int_t qcolumns,
21096      /* Real    */ ae_matrix* q,
21097      ae_state *_state)
21098 {
21099     ae_int_t i;
21100     ae_int_t j;
21101 
21102     ae_matrix_clear(q);
21103 
21104     ae_assert(qcolumns<=m, "RMatrixBDUnpackQ: QColumns>M!", _state);
21105     ae_assert(qcolumns>=0, "RMatrixBDUnpackQ: QColumns<0!", _state);
21106     if( (m==0||n==0)||qcolumns==0 )
21107     {
21108         return;
21109     }
21110 
21111     /*
21112      * prepare Q
21113      */
21114     ae_matrix_set_length(q, m, qcolumns, _state);
21115     for(i=0; i<=m-1; i++)
21116     {
21117         for(j=0; j<=qcolumns-1; j++)
21118         {
21119             if( i==j )
21120             {
21121                 q->ptr.pp_double[i][j] = (double)(1);
21122             }
21123             else
21124             {
21125                 q->ptr.pp_double[i][j] = (double)(0);
21126             }
21127         }
21128     }
21129 
21130     /*
21131      * Calculate
21132      */
21133     rmatrixbdmultiplybyq(qp, m, n, tauq, q, m, qcolumns, ae_false, ae_false, _state);
21134 }
21135 
21136 
21137 /*************************************************************************
21138 Multiplication by matrix Q which reduces matrix A to  bidiagonal form.
21139 
21140 The algorithm allows pre- or post-multiply by Q or Q'.
21141 
21142   ! COMMERCIAL EDITION OF ALGLIB:
21143   !
21144   ! Commercial Edition of ALGLIB includes following important improvements
21145   ! of this function:
21146   ! * high-performance native backend with same C# interface (C# version)
21147   ! * hardware vendor (Intel) implementations of linear algebra primitives
21148   !   (C++ and C# versions, x86/x64 platform)
21149   !
21150   ! We recommend you to read 'Working with commercial version' section  of
21151   ! ALGLIB Reference Manual in order to find out how to  use  performance-
21152   ! related features provided by commercial edition of ALGLIB.
21153 
21154 Input parameters:
21155     QP          -   matrices Q and P in compact form.
21156                     Output of ToBidiagonal subroutine.
21157     M           -   number of rows in matrix A.
21158     N           -   number of columns in matrix A.
21159     TAUQ        -   scalar factors which are used to form Q.
21160                     Output of ToBidiagonal subroutine.
21161     Z           -   multiplied matrix.
21162                     array[0..ZRows-1,0..ZColumns-1]
21163     ZRows       -   number of rows in matrix Z. If FromTheRight=False,
21164                     ZRows=M, otherwise ZRows can be arbitrary.
21165     ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
21166                     ZColumns=M, otherwise ZColumns can be arbitrary.
21167     FromTheRight -  pre- or post-multiply.
21168     DoTranspose -   multiply by Q or Q'.
21169 
21170 Output parameters:
21171     Z           -   product of Z and Q.
21172                     Array[0..ZRows-1,0..ZColumns-1]
21173                     If ZRows=0 or ZColumns=0, the array is not modified.
21174 
21175   -- ALGLIB --
21176      2005-2010
21177      Bochkanov Sergey
21178 *************************************************************************/
rmatrixbdmultiplybyq(ae_matrix * qp,ae_int_t m,ae_int_t n,ae_vector * tauq,ae_matrix * z,ae_int_t zrows,ae_int_t zcolumns,ae_bool fromtheright,ae_bool dotranspose,ae_state * _state)21179 void rmatrixbdmultiplybyq(/* Real    */ ae_matrix* qp,
21180      ae_int_t m,
21181      ae_int_t n,
21182      /* Real    */ ae_vector* tauq,
21183      /* Real    */ ae_matrix* z,
21184      ae_int_t zrows,
21185      ae_int_t zcolumns,
21186      ae_bool fromtheright,
21187      ae_bool dotranspose,
21188      ae_state *_state)
21189 {
21190     ae_frame _frame_block;
21191     ae_int_t i;
21192     ae_int_t i1;
21193     ae_int_t i2;
21194     ae_int_t istep;
21195     ae_vector v;
21196     ae_vector work;
21197     ae_vector dummy;
21198     ae_int_t mx;
21199 
21200     ae_frame_make(_state, &_frame_block);
21201     memset(&v, 0, sizeof(v));
21202     memset(&work, 0, sizeof(work));
21203     memset(&dummy, 0, sizeof(dummy));
21204     ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
21205     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
21206     ae_vector_init(&dummy, 0, DT_REAL, _state, ae_true);
21207 
21208     if( ((m<=0||n<=0)||zrows<=0)||zcolumns<=0 )
21209     {
21210         ae_frame_leave(_state);
21211         return;
21212     }
21213     ae_assert((fromtheright&&zcolumns==m)||(!fromtheright&&zrows==m), "RMatrixBDMultiplyByQ: incorrect Z size!", _state);
21214 
21215     /*
21216      * Try to use MKL code
21217      */
21218     if( rmatrixbdmultiplybymkl(qp, m, n, tauq, &dummy, z, zrows, zcolumns, ae_true, fromtheright, dotranspose, _state) )
21219     {
21220         ae_frame_leave(_state);
21221         return;
21222     }
21223 
21224     /*
21225      * init
21226      */
21227     mx = ae_maxint(m, n, _state);
21228     mx = ae_maxint(mx, zrows, _state);
21229     mx = ae_maxint(mx, zcolumns, _state);
21230     ae_vector_set_length(&v, mx+1, _state);
21231     ae_vector_set_length(&work, mx+1, _state);
21232     if( m>=n )
21233     {
21234 
21235         /*
21236          * setup
21237          */
21238         if( fromtheright )
21239         {
21240             i1 = 0;
21241             i2 = n-1;
21242             istep = 1;
21243         }
21244         else
21245         {
21246             i1 = n-1;
21247             i2 = 0;
21248             istep = -1;
21249         }
21250         if( dotranspose )
21251         {
21252             i = i1;
21253             i1 = i2;
21254             i2 = i;
21255             istep = -istep;
21256         }
21257 
21258         /*
21259          * Process
21260          */
21261         i = i1;
21262         do
21263         {
21264             ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i], qp->stride, ae_v_len(1,m-i));
21265             v.ptr.p_double[1] = (double)(1);
21266             if( fromtheright )
21267             {
21268                 applyreflectionfromtheright(z, tauq->ptr.p_double[i], &v, 0, zrows-1, i, m-1, &work, _state);
21269             }
21270             else
21271             {
21272                 applyreflectionfromtheleft(z, tauq->ptr.p_double[i], &v, i, m-1, 0, zcolumns-1, &work, _state);
21273             }
21274             i = i+istep;
21275         }
21276         while(i!=i2+istep);
21277     }
21278     else
21279     {
21280 
21281         /*
21282          * setup
21283          */
21284         if( fromtheright )
21285         {
21286             i1 = 0;
21287             i2 = m-2;
21288             istep = 1;
21289         }
21290         else
21291         {
21292             i1 = m-2;
21293             i2 = 0;
21294             istep = -1;
21295         }
21296         if( dotranspose )
21297         {
21298             i = i1;
21299             i1 = i2;
21300             i2 = i;
21301             istep = -istep;
21302         }
21303 
21304         /*
21305          * Process
21306          */
21307         if( m-1>0 )
21308         {
21309             i = i1;
21310             do
21311             {
21312                 ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i+1][i], qp->stride, ae_v_len(1,m-i-1));
21313                 v.ptr.p_double[1] = (double)(1);
21314                 if( fromtheright )
21315                 {
21316                     applyreflectionfromtheright(z, tauq->ptr.p_double[i], &v, 0, zrows-1, i+1, m-1, &work, _state);
21317                 }
21318                 else
21319                 {
21320                     applyreflectionfromtheleft(z, tauq->ptr.p_double[i], &v, i+1, m-1, 0, zcolumns-1, &work, _state);
21321                 }
21322                 i = i+istep;
21323             }
21324             while(i!=i2+istep);
21325         }
21326     }
21327     ae_frame_leave(_state);
21328 }
21329 
21330 
21331 /*************************************************************************
21332 Unpacking matrix P which reduces matrix A to bidiagonal form.
21333 The subroutine returns transposed matrix P.
21334 
21335 Input parameters:
21336     QP      -   matrices Q and P in compact form.
21337                 Output of ToBidiagonal subroutine.
21338     M       -   number of rows in matrix A.
21339     N       -   number of columns in matrix A.
21340     TAUP    -   scalar factors which are used to form P.
21341                 Output of ToBidiagonal subroutine.
21342     PTRows  -   required number of rows of matrix P^T. N >= PTRows >= 0.
21343 
21344 Output parameters:
21345     PT      -   first PTRows columns of matrix P^T
21346                 Array[0..PTRows-1, 0..N-1]
21347                 If PTRows=0, the array is not modified.
21348 
21349   -- ALGLIB --
21350      2005-2010
21351      Bochkanov Sergey
21352 *************************************************************************/
rmatrixbdunpackpt(ae_matrix * qp,ae_int_t m,ae_int_t n,ae_vector * taup,ae_int_t ptrows,ae_matrix * pt,ae_state * _state)21353 void rmatrixbdunpackpt(/* Real    */ ae_matrix* qp,
21354      ae_int_t m,
21355      ae_int_t n,
21356      /* Real    */ ae_vector* taup,
21357      ae_int_t ptrows,
21358      /* Real    */ ae_matrix* pt,
21359      ae_state *_state)
21360 {
21361     ae_int_t i;
21362     ae_int_t j;
21363 
21364     ae_matrix_clear(pt);
21365 
21366     ae_assert(ptrows<=n, "RMatrixBDUnpackPT: PTRows>N!", _state);
21367     ae_assert(ptrows>=0, "RMatrixBDUnpackPT: PTRows<0!", _state);
21368     if( (m==0||n==0)||ptrows==0 )
21369     {
21370         return;
21371     }
21372 
21373     /*
21374      * prepare PT
21375      */
21376     ae_matrix_set_length(pt, ptrows, n, _state);
21377     for(i=0; i<=ptrows-1; i++)
21378     {
21379         for(j=0; j<=n-1; j++)
21380         {
21381             if( i==j )
21382             {
21383                 pt->ptr.pp_double[i][j] = (double)(1);
21384             }
21385             else
21386             {
21387                 pt->ptr.pp_double[i][j] = (double)(0);
21388             }
21389         }
21390     }
21391 
21392     /*
21393      * Calculate
21394      */
21395     rmatrixbdmultiplybyp(qp, m, n, taup, pt, ptrows, n, ae_true, ae_true, _state);
21396 }
21397 
21398 
21399 /*************************************************************************
21400 Multiplication by matrix P which reduces matrix A to  bidiagonal form.
21401 
21402 The algorithm allows pre- or post-multiply by P or P'.
21403 
21404 Input parameters:
21405     QP          -   matrices Q and P in compact form.
21406                     Output of RMatrixBD subroutine.
21407     M           -   number of rows in matrix A.
21408     N           -   number of columns in matrix A.
21409     TAUP        -   scalar factors which are used to form P.
21410                     Output of RMatrixBD subroutine.
21411     Z           -   multiplied matrix.
21412                     Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
21413     ZRows       -   number of rows in matrix Z. If FromTheRight=False,
21414                     ZRows=N, otherwise ZRows can be arbitrary.
21415     ZColumns    -   number of columns in matrix Z. If FromTheRight=True,
21416                     ZColumns=N, otherwise ZColumns can be arbitrary.
21417     FromTheRight -  pre- or post-multiply.
21418     DoTranspose -   multiply by P or P'.
21419 
21420 Output parameters:
21421     Z - product of Z and P.
21422                 Array whose indexes range within [0..ZRows-1,0..ZColumns-1].
21423                 If ZRows=0 or ZColumns=0, the array is not modified.
21424 
21425   -- ALGLIB --
21426      2005-2010
21427      Bochkanov Sergey
21428 *************************************************************************/
rmatrixbdmultiplybyp(ae_matrix * qp,ae_int_t m,ae_int_t n,ae_vector * taup,ae_matrix * z,ae_int_t zrows,ae_int_t zcolumns,ae_bool fromtheright,ae_bool dotranspose,ae_state * _state)21429 void rmatrixbdmultiplybyp(/* Real    */ ae_matrix* qp,
21430      ae_int_t m,
21431      ae_int_t n,
21432      /* Real    */ ae_vector* taup,
21433      /* Real    */ ae_matrix* z,
21434      ae_int_t zrows,
21435      ae_int_t zcolumns,
21436      ae_bool fromtheright,
21437      ae_bool dotranspose,
21438      ae_state *_state)
21439 {
21440     ae_frame _frame_block;
21441     ae_int_t i;
21442     ae_vector v;
21443     ae_vector work;
21444     ae_vector dummy;
21445     ae_int_t mx;
21446     ae_int_t i1;
21447     ae_int_t i2;
21448     ae_int_t istep;
21449 
21450     ae_frame_make(_state, &_frame_block);
21451     memset(&v, 0, sizeof(v));
21452     memset(&work, 0, sizeof(work));
21453     memset(&dummy, 0, sizeof(dummy));
21454     ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
21455     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
21456     ae_vector_init(&dummy, 0, DT_REAL, _state, ae_true);
21457 
21458     if( ((m<=0||n<=0)||zrows<=0)||zcolumns<=0 )
21459     {
21460         ae_frame_leave(_state);
21461         return;
21462     }
21463     ae_assert((fromtheright&&zcolumns==n)||(!fromtheright&&zrows==n), "RMatrixBDMultiplyByP: incorrect Z size!", _state);
21464 
21465     /*
21466      * init
21467      */
21468     mx = ae_maxint(m, n, _state);
21469     mx = ae_maxint(mx, zrows, _state);
21470     mx = ae_maxint(mx, zcolumns, _state);
21471     ae_vector_set_length(&v, mx+1, _state);
21472     ae_vector_set_length(&work, mx+1, _state);
21473     if( m>=n )
21474     {
21475 
21476         /*
21477          * setup
21478          */
21479         if( fromtheright )
21480         {
21481             i1 = n-2;
21482             i2 = 0;
21483             istep = -1;
21484         }
21485         else
21486         {
21487             i1 = 0;
21488             i2 = n-2;
21489             istep = 1;
21490         }
21491         if( !dotranspose )
21492         {
21493             i = i1;
21494             i1 = i2;
21495             i2 = i;
21496             istep = -istep;
21497         }
21498 
21499         /*
21500          * Process
21501          */
21502         if( n-1>0 )
21503         {
21504             i = i1;
21505             do
21506             {
21507                 ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i+1], 1, ae_v_len(1,n-1-i));
21508                 v.ptr.p_double[1] = (double)(1);
21509                 if( fromtheright )
21510                 {
21511                     applyreflectionfromtheright(z, taup->ptr.p_double[i], &v, 0, zrows-1, i+1, n-1, &work, _state);
21512                 }
21513                 else
21514                 {
21515                     applyreflectionfromtheleft(z, taup->ptr.p_double[i], &v, i+1, n-1, 0, zcolumns-1, &work, _state);
21516                 }
21517                 i = i+istep;
21518             }
21519             while(i!=i2+istep);
21520         }
21521     }
21522     else
21523     {
21524 
21525         /*
21526          * setup
21527          */
21528         if( fromtheright )
21529         {
21530             i1 = m-1;
21531             i2 = 0;
21532             istep = -1;
21533         }
21534         else
21535         {
21536             i1 = 0;
21537             i2 = m-1;
21538             istep = 1;
21539         }
21540         if( !dotranspose )
21541         {
21542             i = i1;
21543             i1 = i2;
21544             i2 = i;
21545             istep = -istep;
21546         }
21547 
21548         /*
21549          * Process
21550          */
21551         i = i1;
21552         do
21553         {
21554             ae_v_move(&v.ptr.p_double[1], 1, &qp->ptr.pp_double[i][i], 1, ae_v_len(1,n-i));
21555             v.ptr.p_double[1] = (double)(1);
21556             if( fromtheright )
21557             {
21558                 applyreflectionfromtheright(z, taup->ptr.p_double[i], &v, 0, zrows-1, i, n-1, &work, _state);
21559             }
21560             else
21561             {
21562                 applyreflectionfromtheleft(z, taup->ptr.p_double[i], &v, i, n-1, 0, zcolumns-1, &work, _state);
21563             }
21564             i = i+istep;
21565         }
21566         while(i!=i2+istep);
21567     }
21568     ae_frame_leave(_state);
21569 }
21570 
21571 
21572 /*************************************************************************
21573 Unpacking of the main and secondary diagonals of bidiagonal decomposition
21574 of matrix A.
21575 
21576 Input parameters:
21577     B   -   output of RMatrixBD subroutine.
21578     M   -   number of rows in matrix B.
21579     N   -   number of columns in matrix B.
21580 
21581 Output parameters:
21582     IsUpper -   True, if the matrix is upper bidiagonal.
21583                 otherwise IsUpper is False.
21584     D       -   the main diagonal.
21585                 Array whose index ranges within [0..Min(M,N)-1].
21586     E       -   the secondary diagonal (upper or lower, depending on
21587                 the value of IsUpper).
21588                 Array index ranges within [0..Min(M,N)-1], the last
21589                 element is not used.
21590 
21591   -- ALGLIB --
21592      2005-2010
21593      Bochkanov Sergey
21594 *************************************************************************/
rmatrixbdunpackdiagonals(ae_matrix * b,ae_int_t m,ae_int_t n,ae_bool * isupper,ae_vector * d,ae_vector * e,ae_state * _state)21595 void rmatrixbdunpackdiagonals(/* Real    */ ae_matrix* b,
21596      ae_int_t m,
21597      ae_int_t n,
21598      ae_bool* isupper,
21599      /* Real    */ ae_vector* d,
21600      /* Real    */ ae_vector* e,
21601      ae_state *_state)
21602 {
21603     ae_int_t i;
21604 
21605     *isupper = ae_false;
21606     ae_vector_clear(d);
21607     ae_vector_clear(e);
21608 
21609     *isupper = m>=n;
21610     if( m<=0||n<=0 )
21611     {
21612         return;
21613     }
21614     if( *isupper )
21615     {
21616         ae_vector_set_length(d, n, _state);
21617         ae_vector_set_length(e, n, _state);
21618         for(i=0; i<=n-2; i++)
21619         {
21620             d->ptr.p_double[i] = b->ptr.pp_double[i][i];
21621             e->ptr.p_double[i] = b->ptr.pp_double[i][i+1];
21622         }
21623         d->ptr.p_double[n-1] = b->ptr.pp_double[n-1][n-1];
21624     }
21625     else
21626     {
21627         ae_vector_set_length(d, m, _state);
21628         ae_vector_set_length(e, m, _state);
21629         for(i=0; i<=m-2; i++)
21630         {
21631             d->ptr.p_double[i] = b->ptr.pp_double[i][i];
21632             e->ptr.p_double[i] = b->ptr.pp_double[i+1][i];
21633         }
21634         d->ptr.p_double[m-1] = b->ptr.pp_double[m-1][m-1];
21635     }
21636 }
21637 
21638 
21639 /*************************************************************************
21640 Reduction of a square matrix to  upper Hessenberg form: Q'*A*Q = H,
21641 where Q is an orthogonal matrix, H - Hessenberg matrix.
21642 
21643   ! COMMERCIAL EDITION OF ALGLIB:
21644   !
21645   ! Commercial Edition of ALGLIB includes following important improvements
21646   ! of this function:
21647   ! * high-performance native backend with same C# interface (C# version)
21648   ! * hardware vendor (Intel) implementations of linear algebra primitives
21649   !   (C++ and C# versions, x86/x64 platform)
21650   !
21651   ! We recommend you to read 'Working with commercial version' section  of
21652   ! ALGLIB Reference Manual in order to find out how to  use  performance-
21653   ! related features provided by commercial edition of ALGLIB.
21654 
21655 Input parameters:
21656     A       -   matrix A with elements [0..N-1, 0..N-1]
21657     N       -   size of matrix A.
21658 
21659 Output parameters:
21660     A       -   matrices Q and P in  compact form (see below).
21661     Tau     -   array of scalar factors which are used to form matrix Q.
21662                 Array whose index ranges within [0..N-2]
21663 
21664 Matrix H is located on the main diagonal, on the lower secondary  diagonal
21665 and above the main diagonal of matrix A. The elements which are used to
21666 form matrix Q are situated in array Tau and below the lower secondary
21667 diagonal of matrix A as follows:
21668 
21669 Matrix Q is represented as a product of elementary reflections
21670 
21671 Q = H(0)*H(2)*...*H(n-2),
21672 
21673 where each H(i) is given by
21674 
21675 H(i) = 1 - tau * v * (v^T)
21676 
21677 where tau is a scalar stored in Tau[I]; v - is a real vector,
21678 so that v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) stored in A(i+2:n-1,i).
21679 
21680   -- LAPACK routine (version 3.0) --
21681      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
21682      Courant Institute, Argonne National Lab, and Rice University
21683      October 31, 1992
21684 *************************************************************************/
rmatrixhessenberg(ae_matrix * a,ae_int_t n,ae_vector * tau,ae_state * _state)21685 void rmatrixhessenberg(/* Real    */ ae_matrix* a,
21686      ae_int_t n,
21687      /* Real    */ ae_vector* tau,
21688      ae_state *_state)
21689 {
21690     ae_frame _frame_block;
21691     ae_int_t i;
21692     double v;
21693     ae_vector t;
21694     ae_vector work;
21695 
21696     ae_frame_make(_state, &_frame_block);
21697     memset(&t, 0, sizeof(t));
21698     memset(&work, 0, sizeof(work));
21699     ae_vector_clear(tau);
21700     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
21701     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
21702 
21703     ae_assert(n>=0, "RMatrixHessenberg: incorrect N!", _state);
21704 
21705     /*
21706      * Quick return if possible
21707      */
21708     if( n<=1 )
21709     {
21710         ae_frame_leave(_state);
21711         return;
21712     }
21713 
21714     /*
21715      * Allocate place
21716      */
21717     ae_vector_set_length(tau, n-2+1, _state);
21718     ae_vector_set_length(&t, n+1, _state);
21719     ae_vector_set_length(&work, n-1+1, _state);
21720 
21721     /*
21722      * MKL version
21723      */
21724     if( rmatrixhessenbergmkl(a, n, tau, _state) )
21725     {
21726         ae_frame_leave(_state);
21727         return;
21728     }
21729 
21730     /*
21731      * ALGLIB version
21732      */
21733     for(i=0; i<=n-2; i++)
21734     {
21735 
21736         /*
21737          * Compute elementary reflector H(i) to annihilate A(i+2:ihi,i)
21738          */
21739         ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
21740         generatereflection(&t, n-i-1, &v, _state);
21741         ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1));
21742         tau->ptr.p_double[i] = v;
21743         t.ptr.p_double[1] = (double)(1);
21744 
21745         /*
21746          * Apply H(i) to A(1:ihi,i+1:ihi) from the right
21747          */
21748         applyreflectionfromtheright(a, v, &t, 0, n-1, i+1, n-1, &work, _state);
21749 
21750         /*
21751          * Apply H(i) to A(i+1:ihi,i+1:n) from the left
21752          */
21753         applyreflectionfromtheleft(a, v, &t, i+1, n-1, i+1, n-1, &work, _state);
21754     }
21755     ae_frame_leave(_state);
21756 }
21757 
21758 
21759 /*************************************************************************
21760 Unpacking matrix Q which reduces matrix A to upper Hessenberg form
21761 
21762   ! COMMERCIAL EDITION OF ALGLIB:
21763   !
21764   ! Commercial Edition of ALGLIB includes following important improvements
21765   ! of this function:
21766   ! * high-performance native backend with same C# interface (C# version)
21767   ! * hardware vendor (Intel) implementations of linear algebra primitives
21768   !   (C++ and C# versions, x86/x64 platform)
21769   !
21770   ! We recommend you to read 'Working with commercial version' section  of
21771   ! ALGLIB Reference Manual in order to find out how to  use  performance-
21772   ! related features provided by commercial edition of ALGLIB.
21773 
21774 Input parameters:
21775     A   -   output of RMatrixHessenberg subroutine.
21776     N   -   size of matrix A.
21777     Tau -   scalar factors which are used to form Q.
21778             Output of RMatrixHessenberg subroutine.
21779 
21780 Output parameters:
21781     Q   -   matrix Q.
21782             Array whose indexes range within [0..N-1, 0..N-1].
21783 
21784   -- ALGLIB --
21785      2005-2010
21786      Bochkanov Sergey
21787 *************************************************************************/
rmatrixhessenbergunpackq(ae_matrix * a,ae_int_t n,ae_vector * tau,ae_matrix * q,ae_state * _state)21788 void rmatrixhessenbergunpackq(/* Real    */ ae_matrix* a,
21789      ae_int_t n,
21790      /* Real    */ ae_vector* tau,
21791      /* Real    */ ae_matrix* q,
21792      ae_state *_state)
21793 {
21794     ae_frame _frame_block;
21795     ae_int_t i;
21796     ae_int_t j;
21797     ae_vector v;
21798     ae_vector work;
21799 
21800     ae_frame_make(_state, &_frame_block);
21801     memset(&v, 0, sizeof(v));
21802     memset(&work, 0, sizeof(work));
21803     ae_matrix_clear(q);
21804     ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
21805     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
21806 
21807     if( n==0 )
21808     {
21809         ae_frame_leave(_state);
21810         return;
21811     }
21812 
21813     /*
21814      * init
21815      */
21816     ae_matrix_set_length(q, n-1+1, n-1+1, _state);
21817     ae_vector_set_length(&v, n-1+1, _state);
21818     ae_vector_set_length(&work, n-1+1, _state);
21819     for(i=0; i<=n-1; i++)
21820     {
21821         for(j=0; j<=n-1; j++)
21822         {
21823             if( i==j )
21824             {
21825                 q->ptr.pp_double[i][j] = (double)(1);
21826             }
21827             else
21828             {
21829                 q->ptr.pp_double[i][j] = (double)(0);
21830             }
21831         }
21832     }
21833 
21834     /*
21835      * MKL version
21836      */
21837     if( rmatrixhessenbergunpackqmkl(a, n, tau, q, _state) )
21838     {
21839         ae_frame_leave(_state);
21840         return;
21841     }
21842 
21843     /*
21844      * ALGLIB version: unpack Q
21845      */
21846     for(i=0; i<=n-2; i++)
21847     {
21848 
21849         /*
21850          * Apply H(i)
21851          */
21852         ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
21853         v.ptr.p_double[1] = (double)(1);
21854         applyreflectionfromtheright(q, tau->ptr.p_double[i], &v, 0, n-1, i+1, n-1, &work, _state);
21855     }
21856     ae_frame_leave(_state);
21857 }
21858 
21859 
21860 /*************************************************************************
21861 Unpacking matrix H (the result of matrix A reduction to upper Hessenberg form)
21862 
21863 Input parameters:
21864     A   -   output of RMatrixHessenberg subroutine.
21865     N   -   size of matrix A.
21866 
21867 Output parameters:
21868     H   -   matrix H. Array whose indexes range within [0..N-1, 0..N-1].
21869 
21870   -- ALGLIB --
21871      2005-2010
21872      Bochkanov Sergey
21873 *************************************************************************/
rmatrixhessenbergunpackh(ae_matrix * a,ae_int_t n,ae_matrix * h,ae_state * _state)21874 void rmatrixhessenbergunpackh(/* Real    */ ae_matrix* a,
21875      ae_int_t n,
21876      /* Real    */ ae_matrix* h,
21877      ae_state *_state)
21878 {
21879     ae_frame _frame_block;
21880     ae_int_t i;
21881     ae_int_t j;
21882     ae_vector v;
21883     ae_vector work;
21884 
21885     ae_frame_make(_state, &_frame_block);
21886     memset(&v, 0, sizeof(v));
21887     memset(&work, 0, sizeof(work));
21888     ae_matrix_clear(h);
21889     ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
21890     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
21891 
21892     if( n==0 )
21893     {
21894         ae_frame_leave(_state);
21895         return;
21896     }
21897     ae_matrix_set_length(h, n-1+1, n-1+1, _state);
21898     for(i=0; i<=n-1; i++)
21899     {
21900         for(j=0; j<=i-2; j++)
21901         {
21902             h->ptr.pp_double[i][j] = (double)(0);
21903         }
21904         j = ae_maxint(0, i-1, _state);
21905         ae_v_move(&h->ptr.pp_double[i][j], 1, &a->ptr.pp_double[i][j], 1, ae_v_len(j,n-1));
21906     }
21907     ae_frame_leave(_state);
21908 }
21909 
21910 
21911 /*************************************************************************
21912 Reduction of a symmetric matrix which is given by its higher or lower
21913 triangular part to a tridiagonal matrix using orthogonal similarity
21914 transformation: Q'*A*Q=T.
21915 
21916   ! COMMERCIAL EDITION OF ALGLIB:
21917   !
21918   ! Commercial Edition of ALGLIB includes following important improvements
21919   ! of this function:
21920   ! * high-performance native backend with same C# interface (C# version)
21921   ! * hardware vendor (Intel) implementations of linear algebra primitives
21922   !   (C++ and C# versions, x86/x64 platform)
21923   !
21924   ! We recommend you to read 'Working with commercial version' section  of
21925   ! ALGLIB Reference Manual in order to find out how to  use  performance-
21926   ! related features provided by commercial edition of ALGLIB.
21927 
21928 Input parameters:
21929     A       -   matrix to be transformed
21930                 array with elements [0..N-1, 0..N-1].
21931     N       -   size of matrix A.
21932     IsUpper -   storage format. If IsUpper = True, then matrix A is given
21933                 by its upper triangle, and the lower triangle is not used
21934                 and not modified by the algorithm, and vice versa
21935                 if IsUpper = False.
21936 
21937 Output parameters:
21938     A       -   matrices T and Q in  compact form (see lower)
21939     Tau     -   array of factors which are forming matrices H(i)
21940                 array with elements [0..N-2].
21941     D       -   main diagonal of symmetric matrix T.
21942                 array with elements [0..N-1].
21943     E       -   secondary diagonal of symmetric matrix T.
21944                 array with elements [0..N-2].
21945 
21946 
21947   If IsUpper=True, the matrix Q is represented as a product of elementary
21948   reflectors
21949 
21950      Q = H(n-2) . . . H(2) H(0).
21951 
21952   Each H(i) has the form
21953 
21954      H(i) = I - tau * v * v'
21955 
21956   where tau is a real scalar, and v is a real vector with
21957   v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
21958   A(0:i-1,i+1), and tau in TAU(i).
21959 
21960   If IsUpper=False, the matrix Q is represented as a product of elementary
21961   reflectors
21962 
21963      Q = H(0) H(2) . . . H(n-2).
21964 
21965   Each H(i) has the form
21966 
21967      H(i) = I - tau * v * v'
21968 
21969   where tau is a real scalar, and v is a real vector with
21970   v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
21971   and tau in TAU(i).
21972 
21973   The contents of A on exit are illustrated by the following examples
21974   with n = 5:
21975 
21976   if UPLO = 'U':                       if UPLO = 'L':
21977 
21978     (  d   e   v1  v2  v3 )              (  d                  )
21979     (      d   e   v2  v3 )              (  e   d              )
21980     (          d   e   v3 )              (  v0  e   d          )
21981     (              d   e  )              (  v0  v1  e   d      )
21982     (                  d  )              (  v0  v1  v2  e   d  )
21983 
21984   where d and e denote diagonal and off-diagonal elements of T, and vi
21985   denotes an element of the vector defining H(i).
21986 
21987   -- LAPACK routine (version 3.0) --
21988      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
21989      Courant Institute, Argonne National Lab, and Rice University
21990      October 31, 1992
21991 *************************************************************************/
smatrixtd(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_vector * d,ae_vector * e,ae_state * _state)21992 void smatrixtd(/* Real    */ ae_matrix* a,
21993      ae_int_t n,
21994      ae_bool isupper,
21995      /* Real    */ ae_vector* tau,
21996      /* Real    */ ae_vector* d,
21997      /* Real    */ ae_vector* e,
21998      ae_state *_state)
21999 {
22000     ae_frame _frame_block;
22001     ae_int_t i;
22002     double alpha;
22003     double taui;
22004     double v;
22005     ae_vector t;
22006     ae_vector t2;
22007     ae_vector t3;
22008 
22009     ae_frame_make(_state, &_frame_block);
22010     memset(&t, 0, sizeof(t));
22011     memset(&t2, 0, sizeof(t2));
22012     memset(&t3, 0, sizeof(t3));
22013     ae_vector_clear(tau);
22014     ae_vector_clear(d);
22015     ae_vector_clear(e);
22016     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
22017     ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
22018     ae_vector_init(&t3, 0, DT_REAL, _state, ae_true);
22019 
22020     if( n<=0 )
22021     {
22022         ae_frame_leave(_state);
22023         return;
22024     }
22025     ae_vector_set_length(&t, n+1, _state);
22026     ae_vector_set_length(&t2, n+1, _state);
22027     ae_vector_set_length(&t3, n+1, _state);
22028     if( n>1 )
22029     {
22030         ae_vector_set_length(tau, n-2+1, _state);
22031     }
22032     ae_vector_set_length(d, n-1+1, _state);
22033     if( n>1 )
22034     {
22035         ae_vector_set_length(e, n-2+1, _state);
22036     }
22037 
22038     /*
22039      * Try to use MKL
22040      */
22041     if( smatrixtdmkl(a, n, isupper, tau, d, e, _state) )
22042     {
22043         ae_frame_leave(_state);
22044         return;
22045     }
22046 
22047     /*
22048      * ALGLIB version
22049      */
22050     if( isupper )
22051     {
22052 
22053         /*
22054          * Reduce the upper triangle of A
22055          */
22056         for(i=n-2; i>=0; i--)
22057         {
22058 
22059             /*
22060              * Generate elementary reflector H() = E - tau * v * v'
22061              */
22062             if( i>=1 )
22063             {
22064                 ae_v_move(&t.ptr.p_double[2], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(2,i+1));
22065             }
22066             t.ptr.p_double[1] = a->ptr.pp_double[i][i+1];
22067             generatereflection(&t, i+1, &taui, _state);
22068             if( i>=1 )
22069             {
22070                 ae_v_move(&a->ptr.pp_double[0][i+1], a->stride, &t.ptr.p_double[2], 1, ae_v_len(0,i-1));
22071             }
22072             a->ptr.pp_double[i][i+1] = t.ptr.p_double[1];
22073             e->ptr.p_double[i] = a->ptr.pp_double[i][i+1];
22074             if( ae_fp_neq(taui,(double)(0)) )
22075             {
22076 
22077                 /*
22078                  * Apply H from both sides to A
22079                  */
22080                 a->ptr.pp_double[i][i+1] = (double)(1);
22081 
22082                 /*
22083                  * Compute  x := tau * A * v  storing x in TAU
22084                  */
22085                 ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1));
22086                 symmetricmatrixvectormultiply(a, isupper, 0, i, &t, taui, &t3, _state);
22087                 ae_v_move(&tau->ptr.p_double[0], 1, &t3.ptr.p_double[1], 1, ae_v_len(0,i));
22088 
22089                 /*
22090                  * Compute  w := x - 1/2 * tau * (x'*v) * v
22091                  */
22092                 v = ae_v_dotproduct(&tau->ptr.p_double[0], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(0,i));
22093                 alpha = -0.5*taui*v;
22094                 ae_v_addd(&tau->ptr.p_double[0], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(0,i), alpha);
22095 
22096                 /*
22097                  * Apply the transformation as a rank-2 update:
22098                  *    A := A - v * w' - w * v'
22099                  */
22100                 ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1));
22101                 ae_v_move(&t3.ptr.p_double[1], 1, &tau->ptr.p_double[0], 1, ae_v_len(1,i+1));
22102                 symmetricrank2update(a, isupper, 0, i, &t, &t3, &t2, (double)(-1), _state);
22103                 a->ptr.pp_double[i][i+1] = e->ptr.p_double[i];
22104             }
22105             d->ptr.p_double[i+1] = a->ptr.pp_double[i+1][i+1];
22106             tau->ptr.p_double[i] = taui;
22107         }
22108         d->ptr.p_double[0] = a->ptr.pp_double[0][0];
22109     }
22110     else
22111     {
22112 
22113         /*
22114          * Reduce the lower triangle of A
22115          */
22116         for(i=0; i<=n-2; i++)
22117         {
22118 
22119             /*
22120              * Generate elementary reflector H = E - tau * v * v'
22121              */
22122             ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
22123             generatereflection(&t, n-i-1, &taui, _state);
22124             ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &t.ptr.p_double[1], 1, ae_v_len(i+1,n-1));
22125             e->ptr.p_double[i] = a->ptr.pp_double[i+1][i];
22126             if( ae_fp_neq(taui,(double)(0)) )
22127             {
22128 
22129                 /*
22130                  * Apply H from both sides to A
22131                  */
22132                 a->ptr.pp_double[i+1][i] = (double)(1);
22133 
22134                 /*
22135                  * Compute  x := tau * A * v  storing y in TAU
22136                  */
22137                 ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
22138                 symmetricmatrixvectormultiply(a, isupper, i+1, n-1, &t, taui, &t2, _state);
22139                 ae_v_move(&tau->ptr.p_double[i], 1, &t2.ptr.p_double[1], 1, ae_v_len(i,n-2));
22140 
22141                 /*
22142                  * Compute  w := x - 1/2 * tau * (x'*v) * v
22143                  */
22144                 v = ae_v_dotproduct(&tau->ptr.p_double[i], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(i,n-2));
22145                 alpha = -0.5*taui*v;
22146                 ae_v_addd(&tau->ptr.p_double[i], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(i,n-2), alpha);
22147 
22148                 /*
22149                  * Apply the transformation as a rank-2 update:
22150                  *     A := A - v * w' - w * v'
22151                  *
22152                  */
22153                 ae_v_move(&t.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
22154                 ae_v_move(&t2.ptr.p_double[1], 1, &tau->ptr.p_double[i], 1, ae_v_len(1,n-i-1));
22155                 symmetricrank2update(a, isupper, i+1, n-1, &t, &t2, &t3, (double)(-1), _state);
22156                 a->ptr.pp_double[i+1][i] = e->ptr.p_double[i];
22157             }
22158             d->ptr.p_double[i] = a->ptr.pp_double[i][i];
22159             tau->ptr.p_double[i] = taui;
22160         }
22161         d->ptr.p_double[n-1] = a->ptr.pp_double[n-1][n-1];
22162     }
22163     ae_frame_leave(_state);
22164 }
22165 
22166 
22167 /*************************************************************************
22168 Unpacking matrix Q which reduces symmetric matrix to a tridiagonal
22169 form.
22170 
22171   ! COMMERCIAL EDITION OF ALGLIB:
22172   !
22173   ! Commercial Edition of ALGLIB includes following important improvements
22174   ! of this function:
22175   ! * high-performance native backend with same C# interface (C# version)
22176   ! * hardware vendor (Intel) implementations of linear algebra primitives
22177   !   (C++ and C# versions, x86/x64 platform)
22178   !
22179   ! We recommend you to read 'Working with commercial version' section  of
22180   ! ALGLIB Reference Manual in order to find out how to  use  performance-
22181   ! related features provided by commercial edition of ALGLIB.
22182 
22183 Input parameters:
22184     A       -   the result of a SMatrixTD subroutine
22185     N       -   size of matrix A.
22186     IsUpper -   storage format (a parameter of SMatrixTD subroutine)
22187     Tau     -   the result of a SMatrixTD subroutine
22188 
22189 Output parameters:
22190     Q       -   transformation matrix.
22191                 array with elements [0..N-1, 0..N-1].
22192 
22193   -- ALGLIB --
22194      Copyright 2005-2010 by Bochkanov Sergey
22195 *************************************************************************/
smatrixtdunpackq(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_matrix * q,ae_state * _state)22196 void smatrixtdunpackq(/* Real    */ ae_matrix* a,
22197      ae_int_t n,
22198      ae_bool isupper,
22199      /* Real    */ ae_vector* tau,
22200      /* Real    */ ae_matrix* q,
22201      ae_state *_state)
22202 {
22203     ae_frame _frame_block;
22204     ae_int_t i;
22205     ae_int_t j;
22206     ae_vector v;
22207     ae_vector work;
22208 
22209     ae_frame_make(_state, &_frame_block);
22210     memset(&v, 0, sizeof(v));
22211     memset(&work, 0, sizeof(work));
22212     ae_matrix_clear(q);
22213     ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
22214     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
22215 
22216     if( n==0 )
22217     {
22218         ae_frame_leave(_state);
22219         return;
22220     }
22221 
22222     /*
22223      * init
22224      */
22225     ae_matrix_set_length(q, n-1+1, n-1+1, _state);
22226     ae_vector_set_length(&v, n+1, _state);
22227     ae_vector_set_length(&work, n-1+1, _state);
22228     for(i=0; i<=n-1; i++)
22229     {
22230         for(j=0; j<=n-1; j++)
22231         {
22232             if( i==j )
22233             {
22234                 q->ptr.pp_double[i][j] = (double)(1);
22235             }
22236             else
22237             {
22238                 q->ptr.pp_double[i][j] = (double)(0);
22239             }
22240         }
22241     }
22242 
22243     /*
22244      * MKL version
22245      */
22246     if( smatrixtdunpackqmkl(a, n, isupper, tau, q, _state) )
22247     {
22248         ae_frame_leave(_state);
22249         return;
22250     }
22251 
22252     /*
22253      * ALGLIB version: unpack Q
22254      */
22255     if( isupper )
22256     {
22257         for(i=0; i<=n-2; i++)
22258         {
22259 
22260             /*
22261              * Apply H(i)
22262              */
22263             ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[0][i+1], a->stride, ae_v_len(1,i+1));
22264             v.ptr.p_double[i+1] = (double)(1);
22265             applyreflectionfromtheleft(q, tau->ptr.p_double[i], &v, 0, i, 0, n-1, &work, _state);
22266         }
22267     }
22268     else
22269     {
22270         for(i=n-2; i>=0; i--)
22271         {
22272 
22273             /*
22274              * Apply H(i)
22275              */
22276             ae_v_move(&v.ptr.p_double[1], 1, &a->ptr.pp_double[i+1][i], a->stride, ae_v_len(1,n-i-1));
22277             v.ptr.p_double[1] = (double)(1);
22278             applyreflectionfromtheleft(q, tau->ptr.p_double[i], &v, i+1, n-1, 0, n-1, &work, _state);
22279         }
22280     }
22281     ae_frame_leave(_state);
22282 }
22283 
22284 
22285 /*************************************************************************
22286 Reduction of a Hermitian matrix which is given  by  its  higher  or  lower
22287 triangular part to a real  tridiagonal  matrix  using  unitary  similarity
22288 transformation: Q'*A*Q = T.
22289 
22290   ! COMMERCIAL EDITION OF ALGLIB:
22291   !
22292   ! Commercial Edition of ALGLIB includes following important improvements
22293   ! of this function:
22294   ! * high-performance native backend with same C# interface (C# version)
22295   ! * hardware vendor (Intel) implementations of linear algebra primitives
22296   !   (C++ and C# versions, x86/x64 platform)
22297   !
22298   ! We recommend you to read 'Working with commercial version' section  of
22299   ! ALGLIB Reference Manual in order to find out how to  use  performance-
22300   ! related features provided by commercial edition of ALGLIB.
22301 
22302 Input parameters:
22303     A       -   matrix to be transformed
22304                 array with elements [0..N-1, 0..N-1].
22305     N       -   size of matrix A.
22306     IsUpper -   storage format. If IsUpper = True, then matrix A is  given
22307                 by its upper triangle, and the lower triangle is not  used
22308                 and not modified by the algorithm, and vice versa
22309                 if IsUpper = False.
22310 
22311 Output parameters:
22312     A       -   matrices T and Q in  compact form (see lower)
22313     Tau     -   array of factors which are forming matrices H(i)
22314                 array with elements [0..N-2].
22315     D       -   main diagonal of real symmetric matrix T.
22316                 array with elements [0..N-1].
22317     E       -   secondary diagonal of real symmetric matrix T.
22318                 array with elements [0..N-2].
22319 
22320 
22321   If IsUpper=True, the matrix Q is represented as a product of elementary
22322   reflectors
22323 
22324      Q = H(n-2) . . . H(2) H(0).
22325 
22326   Each H(i) has the form
22327 
22328      H(i) = I - tau * v * v'
22329 
22330   where tau is a complex scalar, and v is a complex vector with
22331   v(i+1:n-1) = 0, v(i) = 1, v(0:i-1) is stored on exit in
22332   A(0:i-1,i+1), and tau in TAU(i).
22333 
22334   If IsUpper=False, the matrix Q is represented as a product of elementary
22335   reflectors
22336 
22337      Q = H(0) H(2) . . . H(n-2).
22338 
22339   Each H(i) has the form
22340 
22341      H(i) = I - tau * v * v'
22342 
22343   where tau is a complex scalar, and v is a complex vector with
22344   v(0:i) = 0, v(i+1) = 1, v(i+2:n-1) is stored on exit in A(i+2:n-1,i),
22345   and tau in TAU(i).
22346 
22347   The contents of A on exit are illustrated by the following examples
22348   with n = 5:
22349 
22350   if UPLO = 'U':                       if UPLO = 'L':
22351 
22352     (  d   e   v1  v2  v3 )              (  d                  )
22353     (      d   e   v2  v3 )              (  e   d              )
22354     (          d   e   v3 )              (  v0  e   d          )
22355     (              d   e  )              (  v0  v1  e   d      )
22356     (                  d  )              (  v0  v1  v2  e   d  )
22357 
22358 where d and e denote diagonal and off-diagonal elements of T, and vi
22359 denotes an element of the vector defining H(i).
22360 
22361   -- LAPACK routine (version 3.0) --
22362      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
22363      Courant Institute, Argonne National Lab, and Rice University
22364      October 31, 1992
22365 *************************************************************************/
hmatrixtd(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_vector * d,ae_vector * e,ae_state * _state)22366 void hmatrixtd(/* Complex */ ae_matrix* a,
22367      ae_int_t n,
22368      ae_bool isupper,
22369      /* Complex */ ae_vector* tau,
22370      /* Real    */ ae_vector* d,
22371      /* Real    */ ae_vector* e,
22372      ae_state *_state)
22373 {
22374     ae_frame _frame_block;
22375     ae_int_t i;
22376     ae_complex alpha;
22377     ae_complex taui;
22378     ae_complex v;
22379     ae_vector t;
22380     ae_vector t2;
22381     ae_vector t3;
22382 
22383     ae_frame_make(_state, &_frame_block);
22384     memset(&t, 0, sizeof(t));
22385     memset(&t2, 0, sizeof(t2));
22386     memset(&t3, 0, sizeof(t3));
22387     ae_vector_clear(tau);
22388     ae_vector_clear(d);
22389     ae_vector_clear(e);
22390     ae_vector_init(&t, 0, DT_COMPLEX, _state, ae_true);
22391     ae_vector_init(&t2, 0, DT_COMPLEX, _state, ae_true);
22392     ae_vector_init(&t3, 0, DT_COMPLEX, _state, ae_true);
22393 
22394 
22395     /*
22396      * Init and test
22397      */
22398     if( n<=0 )
22399     {
22400         ae_frame_leave(_state);
22401         return;
22402     }
22403     for(i=0; i<=n-1; i++)
22404     {
22405         ae_assert(ae_fp_eq(a->ptr.pp_complex[i][i].y,(double)(0)), "Assertion failed", _state);
22406     }
22407     if( n>1 )
22408     {
22409         ae_vector_set_length(tau, n-2+1, _state);
22410         ae_vector_set_length(e, n-2+1, _state);
22411     }
22412     ae_vector_set_length(d, n-1+1, _state);
22413     ae_vector_set_length(&t, n-1+1, _state);
22414     ae_vector_set_length(&t2, n-1+1, _state);
22415     ae_vector_set_length(&t3, n-1+1, _state);
22416 
22417     /*
22418      * MKL version
22419      */
22420     if( hmatrixtdmkl(a, n, isupper, tau, d, e, _state) )
22421     {
22422         ae_frame_leave(_state);
22423         return;
22424     }
22425 
22426     /*
22427      * ALGLIB version
22428      */
22429     if( isupper )
22430     {
22431 
22432         /*
22433          * Reduce the upper triangle of A
22434          */
22435         a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(a->ptr.pp_complex[n-1][n-1].x);
22436         for(i=n-2; i>=0; i--)
22437         {
22438 
22439             /*
22440              * Generate elementary reflector H = I+1 - tau * v * v'
22441              */
22442             alpha = a->ptr.pp_complex[i][i+1];
22443             t.ptr.p_complex[1] = alpha;
22444             if( i>=1 )
22445             {
22446                 ae_v_cmove(&t.ptr.p_complex[2], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(2,i+1));
22447             }
22448             complexgeneratereflection(&t, i+1, &taui, _state);
22449             if( i>=1 )
22450             {
22451                 ae_v_cmove(&a->ptr.pp_complex[0][i+1], a->stride, &t.ptr.p_complex[2], 1, "N", ae_v_len(0,i-1));
22452             }
22453             alpha = t.ptr.p_complex[1];
22454             e->ptr.p_double[i] = alpha.x;
22455             if( ae_c_neq_d(taui,(double)(0)) )
22456             {
22457 
22458                 /*
22459                  * Apply H(I+1) from both sides to A
22460                  */
22461                 a->ptr.pp_complex[i][i+1] = ae_complex_from_i(1);
22462 
22463                 /*
22464                  * Compute  x := tau * A * v  storing x in TAU
22465                  */
22466                 ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1));
22467                 hermitianmatrixvectormultiply(a, isupper, 0, i, &t, taui, &t2, _state);
22468                 ae_v_cmove(&tau->ptr.p_complex[0], 1, &t2.ptr.p_complex[1], 1, "N", ae_v_len(0,i));
22469 
22470                 /*
22471                  * Compute  w := x - 1/2 * tau * (x'*v) * v
22472                  */
22473                 v = ae_v_cdotproduct(&tau->ptr.p_complex[0], 1, "Conj", &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(0,i));
22474                 alpha = ae_c_neg(ae_c_mul(ae_c_mul_d(taui,0.5),v));
22475                 ae_v_caddc(&tau->ptr.p_complex[0], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(0,i), alpha);
22476 
22477                 /*
22478                  * Apply the transformation as a rank-2 update:
22479                  *    A := A - v * w' - w * v'
22480                  */
22481                 ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1));
22482                 ae_v_cmove(&t3.ptr.p_complex[1], 1, &tau->ptr.p_complex[0], 1, "N", ae_v_len(1,i+1));
22483                 hermitianrank2update(a, isupper, 0, i, &t, &t3, &t2, ae_complex_from_i(-1), _state);
22484             }
22485             else
22486             {
22487                 a->ptr.pp_complex[i][i] = ae_complex_from_d(a->ptr.pp_complex[i][i].x);
22488             }
22489             a->ptr.pp_complex[i][i+1] = ae_complex_from_d(e->ptr.p_double[i]);
22490             d->ptr.p_double[i+1] = a->ptr.pp_complex[i+1][i+1].x;
22491             tau->ptr.p_complex[i] = taui;
22492         }
22493         d->ptr.p_double[0] = a->ptr.pp_complex[0][0].x;
22494     }
22495     else
22496     {
22497 
22498         /*
22499          * Reduce the lower triangle of A
22500          */
22501         a->ptr.pp_complex[0][0] = ae_complex_from_d(a->ptr.pp_complex[0][0].x);
22502         for(i=0; i<=n-2; i++)
22503         {
22504 
22505             /*
22506              * Generate elementary reflector H = I - tau * v * v'
22507              */
22508             ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
22509             complexgeneratereflection(&t, n-i-1, &taui, _state);
22510             ae_v_cmove(&a->ptr.pp_complex[i+1][i], a->stride, &t.ptr.p_complex[1], 1, "N", ae_v_len(i+1,n-1));
22511             e->ptr.p_double[i] = a->ptr.pp_complex[i+1][i].x;
22512             if( ae_c_neq_d(taui,(double)(0)) )
22513             {
22514 
22515                 /*
22516                  * Apply H(i) from both sides to A(i+1:n,i+1:n)
22517                  */
22518                 a->ptr.pp_complex[i+1][i] = ae_complex_from_i(1);
22519 
22520                 /*
22521                  * Compute  x := tau * A * v  storing y in TAU
22522                  */
22523                 ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
22524                 hermitianmatrixvectormultiply(a, isupper, i+1, n-1, &t, taui, &t2, _state);
22525                 ae_v_cmove(&tau->ptr.p_complex[i], 1, &t2.ptr.p_complex[1], 1, "N", ae_v_len(i,n-2));
22526 
22527                 /*
22528                  * Compute  w := x - 1/2 * tau * (x'*v) * v
22529                  */
22530                 v = ae_v_cdotproduct(&tau->ptr.p_complex[i], 1, "Conj", &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(i,n-2));
22531                 alpha = ae_c_neg(ae_c_mul(ae_c_mul_d(taui,0.5),v));
22532                 ae_v_caddc(&tau->ptr.p_complex[i], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(i,n-2), alpha);
22533 
22534                 /*
22535                  * Apply the transformation as a rank-2 update:
22536                  * A := A - v * w' - w * v'
22537                  */
22538                 ae_v_cmove(&t.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
22539                 ae_v_cmove(&t2.ptr.p_complex[1], 1, &tau->ptr.p_complex[i], 1, "N", ae_v_len(1,n-i-1));
22540                 hermitianrank2update(a, isupper, i+1, n-1, &t, &t2, &t3, ae_complex_from_i(-1), _state);
22541             }
22542             else
22543             {
22544                 a->ptr.pp_complex[i+1][i+1] = ae_complex_from_d(a->ptr.pp_complex[i+1][i+1].x);
22545             }
22546             a->ptr.pp_complex[i+1][i] = ae_complex_from_d(e->ptr.p_double[i]);
22547             d->ptr.p_double[i] = a->ptr.pp_complex[i][i].x;
22548             tau->ptr.p_complex[i] = taui;
22549         }
22550         d->ptr.p_double[n-1] = a->ptr.pp_complex[n-1][n-1].x;
22551     }
22552     ae_frame_leave(_state);
22553 }
22554 
22555 
22556 /*************************************************************************
22557 Unpacking matrix Q which reduces a Hermitian matrix to a real  tridiagonal
22558 form.
22559 
22560   ! COMMERCIAL EDITION OF ALGLIB:
22561   !
22562   ! Commercial Edition of ALGLIB includes following important improvements
22563   ! of this function:
22564   ! * high-performance native backend with same C# interface (C# version)
22565   ! * hardware vendor (Intel) implementations of linear algebra primitives
22566   !   (C++ and C# versions, x86/x64 platform)
22567   !
22568   ! We recommend you to read 'Working with commercial version' section  of
22569   ! ALGLIB Reference Manual in order to find out how to  use  performance-
22570   ! related features provided by commercial edition of ALGLIB.
22571 
22572 Input parameters:
22573     A       -   the result of a HMatrixTD subroutine
22574     N       -   size of matrix A.
22575     IsUpper -   storage format (a parameter of HMatrixTD subroutine)
22576     Tau     -   the result of a HMatrixTD subroutine
22577 
22578 Output parameters:
22579     Q       -   transformation matrix.
22580                 array with elements [0..N-1, 0..N-1].
22581 
22582   -- ALGLIB --
22583      Copyright 2005-2010 by Bochkanov Sergey
22584 *************************************************************************/
hmatrixtdunpackq(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_matrix * q,ae_state * _state)22585 void hmatrixtdunpackq(/* Complex */ ae_matrix* a,
22586      ae_int_t n,
22587      ae_bool isupper,
22588      /* Complex */ ae_vector* tau,
22589      /* Complex */ ae_matrix* q,
22590      ae_state *_state)
22591 {
22592     ae_frame _frame_block;
22593     ae_int_t i;
22594     ae_int_t j;
22595     ae_vector v;
22596     ae_vector work;
22597 
22598     ae_frame_make(_state, &_frame_block);
22599     memset(&v, 0, sizeof(v));
22600     memset(&work, 0, sizeof(work));
22601     ae_matrix_clear(q);
22602     ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
22603     ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
22604 
22605     if( n==0 )
22606     {
22607         ae_frame_leave(_state);
22608         return;
22609     }
22610 
22611     /*
22612      * init
22613      */
22614     ae_matrix_set_length(q, n-1+1, n-1+1, _state);
22615     ae_vector_set_length(&v, n+1, _state);
22616     ae_vector_set_length(&work, n-1+1, _state);
22617 
22618     /*
22619      * MKL version
22620      */
22621     if( hmatrixtdunpackqmkl(a, n, isupper, tau, q, _state) )
22622     {
22623         ae_frame_leave(_state);
22624         return;
22625     }
22626 
22627     /*
22628      * ALGLIB version
22629      */
22630     for(i=0; i<=n-1; i++)
22631     {
22632         for(j=0; j<=n-1; j++)
22633         {
22634             if( i==j )
22635             {
22636                 q->ptr.pp_complex[i][j] = ae_complex_from_i(1);
22637             }
22638             else
22639             {
22640                 q->ptr.pp_complex[i][j] = ae_complex_from_i(0);
22641             }
22642         }
22643     }
22644     if( isupper )
22645     {
22646         for(i=0; i<=n-2; i++)
22647         {
22648 
22649             /*
22650              * Apply H(i)
22651              */
22652             ae_v_cmove(&v.ptr.p_complex[1], 1, &a->ptr.pp_complex[0][i+1], a->stride, "N", ae_v_len(1,i+1));
22653             v.ptr.p_complex[i+1] = ae_complex_from_i(1);
22654             complexapplyreflectionfromtheleft(q, tau->ptr.p_complex[i], &v, 0, i, 0, n-1, &work, _state);
22655         }
22656     }
22657     else
22658     {
22659         for(i=n-2; i>=0; i--)
22660         {
22661 
22662             /*
22663              * Apply H(i)
22664              */
22665             ae_v_cmove(&v.ptr.p_complex[1], 1, &a->ptr.pp_complex[i+1][i], a->stride, "N", ae_v_len(1,n-i-1));
22666             v.ptr.p_complex[1] = ae_complex_from_i(1);
22667             complexapplyreflectionfromtheleft(q, tau->ptr.p_complex[i], &v, i+1, n-1, 0, n-1, &work, _state);
22668         }
22669     }
22670     ae_frame_leave(_state);
22671 }
22672 
22673 
22674 /*************************************************************************
22675 Base case for complex QR
22676 
22677   -- LAPACK routine (version 3.0) --
22678      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
22679      Courant Institute, Argonne National Lab, and Rice University
22680      September 30, 1994.
22681      Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
22682      pseudocode, 2007-2010.
22683 *************************************************************************/
ortfac_cmatrixqrbasecase(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * work,ae_vector * t,ae_vector * tau,ae_state * _state)22684 static void ortfac_cmatrixqrbasecase(/* Complex */ ae_matrix* a,
22685      ae_int_t m,
22686      ae_int_t n,
22687      /* Complex */ ae_vector* work,
22688      /* Complex */ ae_vector* t,
22689      /* Complex */ ae_vector* tau,
22690      ae_state *_state)
22691 {
22692     ae_int_t i;
22693     ae_int_t k;
22694     ae_int_t mmi;
22695     ae_int_t minmn;
22696     ae_complex tmp;
22697 
22698 
22699     minmn = ae_minint(m, n, _state);
22700     if( minmn<=0 )
22701     {
22702         return;
22703     }
22704 
22705     /*
22706      * Test the input arguments
22707      */
22708     k = ae_minint(m, n, _state);
22709     for(i=0; i<=k-1; i++)
22710     {
22711 
22712         /*
22713          * Generate elementary reflector H(i) to annihilate A(i+1:m,i)
22714          */
22715         mmi = m-i;
22716         ae_v_cmove(&t->ptr.p_complex[1], 1, &a->ptr.pp_complex[i][i], a->stride, "N", ae_v_len(1,mmi));
22717         complexgeneratereflection(t, mmi, &tmp, _state);
22718         tau->ptr.p_complex[i] = tmp;
22719         ae_v_cmove(&a->ptr.pp_complex[i][i], a->stride, &t->ptr.p_complex[1], 1, "N", ae_v_len(i,m-1));
22720         t->ptr.p_complex[1] = ae_complex_from_i(1);
22721         if( i<n-1 )
22722         {
22723 
22724             /*
22725              * Apply H'(i) to A(i:m,i+1:n) from the left
22726              */
22727             complexapplyreflectionfromtheleft(a, ae_c_conj(tau->ptr.p_complex[i], _state), t, i, m-1, i+1, n-1, work, _state);
22728         }
22729     }
22730 }
22731 
22732 
22733 /*************************************************************************
22734 Base case for complex LQ
22735 
22736   -- LAPACK routine (version 3.0) --
22737      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
22738      Courant Institute, Argonne National Lab, and Rice University
22739      September 30, 1994.
22740      Sergey Bochkanov, ALGLIB project, translation from FORTRAN to
22741      pseudocode, 2007-2010.
22742 *************************************************************************/
ortfac_cmatrixlqbasecase(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * work,ae_vector * t,ae_vector * tau,ae_state * _state)22743 static void ortfac_cmatrixlqbasecase(/* Complex */ ae_matrix* a,
22744      ae_int_t m,
22745      ae_int_t n,
22746      /* Complex */ ae_vector* work,
22747      /* Complex */ ae_vector* t,
22748      /* Complex */ ae_vector* tau,
22749      ae_state *_state)
22750 {
22751     ae_int_t i;
22752     ae_int_t minmn;
22753     ae_complex tmp;
22754 
22755 
22756     minmn = ae_minint(m, n, _state);
22757     if( minmn<=0 )
22758     {
22759         return;
22760     }
22761 
22762     /*
22763      * Test the input arguments
22764      */
22765     for(i=0; i<=minmn-1; i++)
22766     {
22767 
22768         /*
22769          * Generate elementary reflector H(i)
22770          *
22771          * NOTE: ComplexGenerateReflection() generates left reflector,
22772          * i.e. H which reduces x by applyiong from the left, but we
22773          * need RIGHT reflector. So we replace H=E-tau*v*v' by H^H,
22774          * which changes v to conj(v).
22775          */
22776         ae_v_cmove(&t->ptr.p_complex[1], 1, &a->ptr.pp_complex[i][i], 1, "Conj", ae_v_len(1,n-i));
22777         complexgeneratereflection(t, n-i, &tmp, _state);
22778         tau->ptr.p_complex[i] = tmp;
22779         ae_v_cmove(&a->ptr.pp_complex[i][i], 1, &t->ptr.p_complex[1], 1, "Conj", ae_v_len(i,n-1));
22780         t->ptr.p_complex[1] = ae_complex_from_i(1);
22781         if( i<m-1 )
22782         {
22783 
22784             /*
22785              * Apply H'(i)
22786              */
22787             complexapplyreflectionfromtheright(a, tau->ptr.p_complex[i], t, i+1, m-1, i, n-1, work, _state);
22788         }
22789     }
22790 }
22791 
22792 
22793 /*************************************************************************
22794 Generate block reflector:
22795 * fill unused parts of reflectors matrix by zeros
22796 * fill diagonal of reflectors matrix by ones
22797 * generate triangular factor T
22798 
22799 PARAMETERS:
22800     A           -   either LengthA*BlockSize (if ColumnwiseA) or
22801                     BlockSize*LengthA (if not ColumnwiseA) matrix of
22802                     elementary reflectors.
22803                     Modified on exit.
22804     Tau         -   scalar factors
22805     ColumnwiseA -   reflectors are stored in rows or in columns
22806     LengthA     -   length of largest reflector
22807     BlockSize   -   number of reflectors
22808     T           -   array[BlockSize,2*BlockSize]. Left BlockSize*BlockSize
22809                     submatrix stores triangular factor on exit.
22810     WORK        -   array[BlockSize]
22811 
22812   -- ALGLIB routine --
22813      17.02.2010
22814      Bochkanov Sergey
22815 *************************************************************************/
ortfac_rmatrixblockreflector(ae_matrix * a,ae_vector * tau,ae_bool columnwisea,ae_int_t lengtha,ae_int_t blocksize,ae_matrix * t,ae_vector * work,ae_state * _state)22816 static void ortfac_rmatrixblockreflector(/* Real    */ ae_matrix* a,
22817      /* Real    */ ae_vector* tau,
22818      ae_bool columnwisea,
22819      ae_int_t lengtha,
22820      ae_int_t blocksize,
22821      /* Real    */ ae_matrix* t,
22822      /* Real    */ ae_vector* work,
22823      ae_state *_state)
22824 {
22825     ae_int_t i;
22826     ae_int_t j;
22827     ae_int_t k;
22828     double v;
22829 
22830 
22831 
22832     /*
22833      * fill beginning of new column with zeros,
22834      * load 1.0 in the first non-zero element
22835      */
22836     for(k=0; k<=blocksize-1; k++)
22837     {
22838         if( columnwisea )
22839         {
22840             for(i=0; i<=k-1; i++)
22841             {
22842                 a->ptr.pp_double[i][k] = (double)(0);
22843             }
22844         }
22845         else
22846         {
22847             for(i=0; i<=k-1; i++)
22848             {
22849                 a->ptr.pp_double[k][i] = (double)(0);
22850             }
22851         }
22852         a->ptr.pp_double[k][k] = (double)(1);
22853     }
22854 
22855     /*
22856      * Calculate Gram matrix of A
22857      */
22858     for(i=0; i<=blocksize-1; i++)
22859     {
22860         for(j=0; j<=blocksize-1; j++)
22861         {
22862             t->ptr.pp_double[i][blocksize+j] = (double)(0);
22863         }
22864     }
22865     for(k=0; k<=lengtha-1; k++)
22866     {
22867         for(j=1; j<=blocksize-1; j++)
22868         {
22869             if( columnwisea )
22870             {
22871                 v = a->ptr.pp_double[k][j];
22872                 if( ae_fp_neq(v,(double)(0)) )
22873                 {
22874                     ae_v_addd(&t->ptr.pp_double[j][blocksize], 1, &a->ptr.pp_double[k][0], 1, ae_v_len(blocksize,blocksize+j-1), v);
22875                 }
22876             }
22877             else
22878             {
22879                 v = a->ptr.pp_double[j][k];
22880                 if( ae_fp_neq(v,(double)(0)) )
22881                 {
22882                     ae_v_addd(&t->ptr.pp_double[j][blocksize], 1, &a->ptr.pp_double[0][k], a->stride, ae_v_len(blocksize,blocksize+j-1), v);
22883                 }
22884             }
22885         }
22886     }
22887 
22888     /*
22889      * Prepare Y (stored in TmpA) and T (stored in TmpT)
22890      */
22891     for(k=0; k<=blocksize-1; k++)
22892     {
22893 
22894         /*
22895          * fill non-zero part of T, use pre-calculated Gram matrix
22896          */
22897         ae_v_move(&work->ptr.p_double[0], 1, &t->ptr.pp_double[k][blocksize], 1, ae_v_len(0,k-1));
22898         for(i=0; i<=k-1; i++)
22899         {
22900             v = ae_v_dotproduct(&t->ptr.pp_double[i][i], 1, &work->ptr.p_double[i], 1, ae_v_len(i,k-1));
22901             t->ptr.pp_double[i][k] = -tau->ptr.p_double[k]*v;
22902         }
22903         t->ptr.pp_double[k][k] = -tau->ptr.p_double[k];
22904 
22905         /*
22906          * Rest of T is filled by zeros
22907          */
22908         for(i=k+1; i<=blocksize-1; i++)
22909         {
22910             t->ptr.pp_double[i][k] = (double)(0);
22911         }
22912     }
22913 }
22914 
22915 
22916 /*************************************************************************
22917 Generate block reflector (complex):
22918 * fill unused parts of reflectors matrix by zeros
22919 * fill diagonal of reflectors matrix by ones
22920 * generate triangular factor T
22921 
22922 
22923   -- ALGLIB routine --
22924      17.02.2010
22925      Bochkanov Sergey
22926 *************************************************************************/
ortfac_cmatrixblockreflector(ae_matrix * a,ae_vector * tau,ae_bool columnwisea,ae_int_t lengtha,ae_int_t blocksize,ae_matrix * t,ae_vector * work,ae_state * _state)22927 static void ortfac_cmatrixblockreflector(/* Complex */ ae_matrix* a,
22928      /* Complex */ ae_vector* tau,
22929      ae_bool columnwisea,
22930      ae_int_t lengtha,
22931      ae_int_t blocksize,
22932      /* Complex */ ae_matrix* t,
22933      /* Complex */ ae_vector* work,
22934      ae_state *_state)
22935 {
22936     ae_int_t i;
22937     ae_int_t k;
22938     ae_complex v;
22939 
22940 
22941 
22942     /*
22943      * Prepare Y (stored in TmpA) and T (stored in TmpT)
22944      */
22945     for(k=0; k<=blocksize-1; k++)
22946     {
22947 
22948         /*
22949          * fill beginning of new column with zeros,
22950          * load 1.0 in the first non-zero element
22951          */
22952         if( columnwisea )
22953         {
22954             for(i=0; i<=k-1; i++)
22955             {
22956                 a->ptr.pp_complex[i][k] = ae_complex_from_i(0);
22957             }
22958         }
22959         else
22960         {
22961             for(i=0; i<=k-1; i++)
22962             {
22963                 a->ptr.pp_complex[k][i] = ae_complex_from_i(0);
22964             }
22965         }
22966         a->ptr.pp_complex[k][k] = ae_complex_from_i(1);
22967 
22968         /*
22969          * fill non-zero part of T,
22970          */
22971         for(i=0; i<=k-1; i++)
22972         {
22973             if( columnwisea )
22974             {
22975                 v = ae_v_cdotproduct(&a->ptr.pp_complex[k][i], a->stride, "Conj", &a->ptr.pp_complex[k][k], a->stride, "N", ae_v_len(k,lengtha-1));
22976             }
22977             else
22978             {
22979                 v = ae_v_cdotproduct(&a->ptr.pp_complex[i][k], 1, "N", &a->ptr.pp_complex[k][k], 1, "Conj", ae_v_len(k,lengtha-1));
22980             }
22981             work->ptr.p_complex[i] = v;
22982         }
22983         for(i=0; i<=k-1; i++)
22984         {
22985             v = ae_v_cdotproduct(&t->ptr.pp_complex[i][i], 1, "N", &work->ptr.p_complex[i], 1, "N", ae_v_len(i,k-1));
22986             t->ptr.pp_complex[i][k] = ae_c_neg(ae_c_mul(tau->ptr.p_complex[k],v));
22987         }
22988         t->ptr.pp_complex[k][k] = ae_c_neg(tau->ptr.p_complex[k]);
22989 
22990         /*
22991          * Rest of T is filled by zeros
22992          */
22993         for(i=k+1; i<=blocksize-1; i++)
22994         {
22995             t->ptr.pp_complex[i][k] = ae_complex_from_i(0);
22996         }
22997     }
22998 }
22999 
23000 
23001 #endif
23002 #if defined(AE_COMPILE_MATGEN) || !defined(AE_PARTIAL_BUILD)
23003 
23004 
23005 /*************************************************************************
23006 Generation of a random uniformly distributed (Haar) orthogonal matrix
23007 
23008 INPUT PARAMETERS:
23009     N   -   matrix size, N>=1
23010 
23011 OUTPUT PARAMETERS:
23012     A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
23013 
23014 NOTE: this function uses algorithm  described  in  Stewart, G. W.  (1980),
23015       "The Efficient Generation of  Random  Orthogonal  Matrices  with  an
23016       Application to Condition Estimators".
23017 
23018       Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it:
23019       * takes an NxN one
23020       * takes uniformly distributed unit vector of dimension N+1.
23021       * constructs a Householder reflection from the vector, then applies
23022         it to the smaller matrix (embedded in the larger size with a 1 at
23023         the bottom right corner).
23024 
23025   -- ALGLIB routine --
23026      04.12.2009
23027      Bochkanov Sergey
23028 *************************************************************************/
rmatrixrndorthogonal(ae_int_t n,ae_matrix * a,ae_state * _state)23029 void rmatrixrndorthogonal(ae_int_t n,
23030      /* Real    */ ae_matrix* a,
23031      ae_state *_state)
23032 {
23033     ae_int_t i;
23034     ae_int_t j;
23035 
23036     ae_matrix_clear(a);
23037 
23038     ae_assert(n>=1, "RMatrixRndOrthogonal: N<1!", _state);
23039     ae_matrix_set_length(a, n, n, _state);
23040     for(i=0; i<=n-1; i++)
23041     {
23042         for(j=0; j<=n-1; j++)
23043         {
23044             if( i==j )
23045             {
23046                 a->ptr.pp_double[i][j] = (double)(1);
23047             }
23048             else
23049             {
23050                 a->ptr.pp_double[i][j] = (double)(0);
23051             }
23052         }
23053     }
23054     rmatrixrndorthogonalfromtheright(a, n, n, _state);
23055 }
23056 
23057 
23058 /*************************************************************************
23059 Generation of random NxN matrix with given condition number and norm2(A)=1
23060 
23061 INPUT PARAMETERS:
23062     N   -   matrix size
23063     C   -   condition number (in 2-norm)
23064 
23065 OUTPUT PARAMETERS:
23066     A   -   random matrix with norm2(A)=1 and cond(A)=C
23067 
23068   -- ALGLIB routine --
23069      04.12.2009
23070      Bochkanov Sergey
23071 *************************************************************************/
rmatrixrndcond(ae_int_t n,double c,ae_matrix * a,ae_state * _state)23072 void rmatrixrndcond(ae_int_t n,
23073      double c,
23074      /* Real    */ ae_matrix* a,
23075      ae_state *_state)
23076 {
23077     ae_frame _frame_block;
23078     ae_int_t i;
23079     ae_int_t j;
23080     double l1;
23081     double l2;
23082     hqrndstate rs;
23083 
23084     ae_frame_make(_state, &_frame_block);
23085     memset(&rs, 0, sizeof(rs));
23086     ae_matrix_clear(a);
23087     _hqrndstate_init(&rs, _state, ae_true);
23088 
23089     ae_assert(n>=1&&ae_fp_greater_eq(c,(double)(1)), "RMatrixRndCond: N<1 or C<1!", _state);
23090     ae_matrix_set_length(a, n, n, _state);
23091     if( n==1 )
23092     {
23093 
23094         /*
23095          * special case
23096          */
23097         a->ptr.pp_double[0][0] = (double)(2*ae_randominteger(2, _state)-1);
23098         ae_frame_leave(_state);
23099         return;
23100     }
23101     hqrndrandomize(&rs, _state);
23102     l1 = (double)(0);
23103     l2 = ae_log(1/c, _state);
23104     for(i=0; i<=n-1; i++)
23105     {
23106         for(j=0; j<=n-1; j++)
23107         {
23108             a->ptr.pp_double[i][j] = (double)(0);
23109         }
23110     }
23111     a->ptr.pp_double[0][0] = ae_exp(l1, _state);
23112     for(i=1; i<=n-2; i++)
23113     {
23114         a->ptr.pp_double[i][i] = ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state);
23115     }
23116     a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state);
23117     rmatrixrndorthogonalfromtheleft(a, n, n, _state);
23118     rmatrixrndorthogonalfromtheright(a, n, n, _state);
23119     ae_frame_leave(_state);
23120 }
23121 
23122 
23123 /*************************************************************************
23124 Generation of a random Haar distributed orthogonal complex matrix
23125 
23126 INPUT PARAMETERS:
23127     N   -   matrix size, N>=1
23128 
23129 OUTPUT PARAMETERS:
23130     A   -   orthogonal NxN matrix, array[0..N-1,0..N-1]
23131 
23132 NOTE: this function uses algorithm  described  in  Stewart, G. W.  (1980),
23133       "The Efficient Generation of  Random  Orthogonal  Matrices  with  an
23134       Application to Condition Estimators".
23135 
23136       Speaking short, to generate an (N+1)x(N+1) orthogonal matrix, it:
23137       * takes an NxN one
23138       * takes uniformly distributed unit vector of dimension N+1.
23139       * constructs a Householder reflection from the vector, then applies
23140         it to the smaller matrix (embedded in the larger size with a 1 at
23141         the bottom right corner).
23142 
23143   -- ALGLIB routine --
23144      04.12.2009
23145      Bochkanov Sergey
23146 *************************************************************************/
cmatrixrndorthogonal(ae_int_t n,ae_matrix * a,ae_state * _state)23147 void cmatrixrndorthogonal(ae_int_t n,
23148      /* Complex */ ae_matrix* a,
23149      ae_state *_state)
23150 {
23151     ae_int_t i;
23152     ae_int_t j;
23153 
23154     ae_matrix_clear(a);
23155 
23156     ae_assert(n>=1, "CMatrixRndOrthogonal: N<1!", _state);
23157     ae_matrix_set_length(a, n, n, _state);
23158     for(i=0; i<=n-1; i++)
23159     {
23160         for(j=0; j<=n-1; j++)
23161         {
23162             if( i==j )
23163             {
23164                 a->ptr.pp_complex[i][j] = ae_complex_from_i(1);
23165             }
23166             else
23167             {
23168                 a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
23169             }
23170         }
23171     }
23172     cmatrixrndorthogonalfromtheright(a, n, n, _state);
23173 }
23174 
23175 
23176 /*************************************************************************
23177 Generation of random NxN complex matrix with given condition number C and
23178 norm2(A)=1
23179 
23180 INPUT PARAMETERS:
23181     N   -   matrix size
23182     C   -   condition number (in 2-norm)
23183 
23184 OUTPUT PARAMETERS:
23185     A   -   random matrix with norm2(A)=1 and cond(A)=C
23186 
23187   -- ALGLIB routine --
23188      04.12.2009
23189      Bochkanov Sergey
23190 *************************************************************************/
cmatrixrndcond(ae_int_t n,double c,ae_matrix * a,ae_state * _state)23191 void cmatrixrndcond(ae_int_t n,
23192      double c,
23193      /* Complex */ ae_matrix* a,
23194      ae_state *_state)
23195 {
23196     ae_frame _frame_block;
23197     ae_int_t i;
23198     ae_int_t j;
23199     double l1;
23200     double l2;
23201     hqrndstate state;
23202     ae_complex v;
23203 
23204     ae_frame_make(_state, &_frame_block);
23205     memset(&state, 0, sizeof(state));
23206     ae_matrix_clear(a);
23207     _hqrndstate_init(&state, _state, ae_true);
23208 
23209     ae_assert(n>=1&&ae_fp_greater_eq(c,(double)(1)), "CMatrixRndCond: N<1 or C<1!", _state);
23210     ae_matrix_set_length(a, n, n, _state);
23211     if( n==1 )
23212     {
23213 
23214         /*
23215          * special case
23216          */
23217         hqrndrandomize(&state, _state);
23218         hqrndunit2(&state, &v.x, &v.y, _state);
23219         a->ptr.pp_complex[0][0] = v;
23220         ae_frame_leave(_state);
23221         return;
23222     }
23223     hqrndrandomize(&state, _state);
23224     l1 = (double)(0);
23225     l2 = ae_log(1/c, _state);
23226     for(i=0; i<=n-1; i++)
23227     {
23228         for(j=0; j<=n-1; j++)
23229         {
23230             a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
23231         }
23232     }
23233     a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state));
23234     for(i=1; i<=n-2; i++)
23235     {
23236         a->ptr.pp_complex[i][i] = ae_complex_from_d(ae_exp(hqrnduniformr(&state, _state)*(l2-l1)+l1, _state));
23237     }
23238     a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state));
23239     cmatrixrndorthogonalfromtheleft(a, n, n, _state);
23240     cmatrixrndorthogonalfromtheright(a, n, n, _state);
23241     ae_frame_leave(_state);
23242 }
23243 
23244 
23245 /*************************************************************************
23246 Generation of random NxN symmetric matrix with given condition number  and
23247 norm2(A)=1
23248 
23249 INPUT PARAMETERS:
23250     N   -   matrix size
23251     C   -   condition number (in 2-norm)
23252 
23253 OUTPUT PARAMETERS:
23254     A   -   random matrix with norm2(A)=1 and cond(A)=C
23255 
23256   -- ALGLIB routine --
23257      04.12.2009
23258      Bochkanov Sergey
23259 *************************************************************************/
smatrixrndcond(ae_int_t n,double c,ae_matrix * a,ae_state * _state)23260 void smatrixrndcond(ae_int_t n,
23261      double c,
23262      /* Real    */ ae_matrix* a,
23263      ae_state *_state)
23264 {
23265     ae_frame _frame_block;
23266     ae_int_t i;
23267     ae_int_t j;
23268     double l1;
23269     double l2;
23270     hqrndstate rs;
23271 
23272     ae_frame_make(_state, &_frame_block);
23273     memset(&rs, 0, sizeof(rs));
23274     ae_matrix_clear(a);
23275     _hqrndstate_init(&rs, _state, ae_true);
23276 
23277     ae_assert(n>=1&&ae_fp_greater_eq(c,(double)(1)), "SMatrixRndCond: N<1 or C<1!", _state);
23278     ae_matrix_set_length(a, n, n, _state);
23279     if( n==1 )
23280     {
23281 
23282         /*
23283          * special case
23284          */
23285         a->ptr.pp_double[0][0] = (double)(2*ae_randominteger(2, _state)-1);
23286         ae_frame_leave(_state);
23287         return;
23288     }
23289 
23290     /*
23291      * Prepare matrix
23292      */
23293     hqrndrandomize(&rs, _state);
23294     l1 = (double)(0);
23295     l2 = ae_log(1/c, _state);
23296     for(i=0; i<=n-1; i++)
23297     {
23298         for(j=0; j<=n-1; j++)
23299         {
23300             a->ptr.pp_double[i][j] = (double)(0);
23301         }
23302     }
23303     a->ptr.pp_double[0][0] = ae_exp(l1, _state);
23304     for(i=1; i<=n-2; i++)
23305     {
23306         a->ptr.pp_double[i][i] = (2*hqrnduniformi(&rs, 2, _state)-1)*ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state);
23307     }
23308     a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state);
23309 
23310     /*
23311      * Multiply
23312      */
23313     smatrixrndmultiply(a, n, _state);
23314     ae_frame_leave(_state);
23315 }
23316 
23317 
23318 /*************************************************************************
23319 Generation of random NxN symmetric positive definite matrix with given
23320 condition number and norm2(A)=1
23321 
23322 INPUT PARAMETERS:
23323     N   -   matrix size
23324     C   -   condition number (in 2-norm)
23325 
23326 OUTPUT PARAMETERS:
23327     A   -   random SPD matrix with norm2(A)=1 and cond(A)=C
23328 
23329   -- ALGLIB routine --
23330      04.12.2009
23331      Bochkanov Sergey
23332 *************************************************************************/
spdmatrixrndcond(ae_int_t n,double c,ae_matrix * a,ae_state * _state)23333 void spdmatrixrndcond(ae_int_t n,
23334      double c,
23335      /* Real    */ ae_matrix* a,
23336      ae_state *_state)
23337 {
23338     ae_frame _frame_block;
23339     ae_int_t i;
23340     ae_int_t j;
23341     double l1;
23342     double l2;
23343     hqrndstate rs;
23344 
23345     ae_frame_make(_state, &_frame_block);
23346     memset(&rs, 0, sizeof(rs));
23347     ae_matrix_clear(a);
23348     _hqrndstate_init(&rs, _state, ae_true);
23349 
23350 
23351     /*
23352      * Special cases
23353      */
23354     if( n<=0||ae_fp_less(c,(double)(1)) )
23355     {
23356         ae_frame_leave(_state);
23357         return;
23358     }
23359     ae_matrix_set_length(a, n, n, _state);
23360     if( n==1 )
23361     {
23362         a->ptr.pp_double[0][0] = (double)(1);
23363         ae_frame_leave(_state);
23364         return;
23365     }
23366 
23367     /*
23368      * Prepare matrix
23369      */
23370     hqrndrandomize(&rs, _state);
23371     l1 = (double)(0);
23372     l2 = ae_log(1/c, _state);
23373     for(i=0; i<=n-1; i++)
23374     {
23375         for(j=0; j<=n-1; j++)
23376         {
23377             a->ptr.pp_double[i][j] = (double)(0);
23378         }
23379     }
23380     a->ptr.pp_double[0][0] = ae_exp(l1, _state);
23381     for(i=1; i<=n-2; i++)
23382     {
23383         a->ptr.pp_double[i][i] = ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state);
23384     }
23385     a->ptr.pp_double[n-1][n-1] = ae_exp(l2, _state);
23386 
23387     /*
23388      * Multiply
23389      */
23390     smatrixrndmultiply(a, n, _state);
23391     ae_frame_leave(_state);
23392 }
23393 
23394 
23395 /*************************************************************************
23396 Generation of random NxN Hermitian matrix with given condition number  and
23397 norm2(A)=1
23398 
23399 INPUT PARAMETERS:
23400     N   -   matrix size
23401     C   -   condition number (in 2-norm)
23402 
23403 OUTPUT PARAMETERS:
23404     A   -   random matrix with norm2(A)=1 and cond(A)=C
23405 
23406   -- ALGLIB routine --
23407      04.12.2009
23408      Bochkanov Sergey
23409 *************************************************************************/
hmatrixrndcond(ae_int_t n,double c,ae_matrix * a,ae_state * _state)23410 void hmatrixrndcond(ae_int_t n,
23411      double c,
23412      /* Complex */ ae_matrix* a,
23413      ae_state *_state)
23414 {
23415     ae_frame _frame_block;
23416     ae_int_t i;
23417     ae_int_t j;
23418     double l1;
23419     double l2;
23420     hqrndstate rs;
23421 
23422     ae_frame_make(_state, &_frame_block);
23423     memset(&rs, 0, sizeof(rs));
23424     ae_matrix_clear(a);
23425     _hqrndstate_init(&rs, _state, ae_true);
23426 
23427     ae_assert(n>=1&&ae_fp_greater_eq(c,(double)(1)), "HMatrixRndCond: N<1 or C<1!", _state);
23428     ae_matrix_set_length(a, n, n, _state);
23429     if( n==1 )
23430     {
23431 
23432         /*
23433          * special case
23434          */
23435         a->ptr.pp_complex[0][0] = ae_complex_from_i(2*ae_randominteger(2, _state)-1);
23436         ae_frame_leave(_state);
23437         return;
23438     }
23439 
23440     /*
23441      * Prepare matrix
23442      */
23443     hqrndrandomize(&rs, _state);
23444     l1 = (double)(0);
23445     l2 = ae_log(1/c, _state);
23446     for(i=0; i<=n-1; i++)
23447     {
23448         for(j=0; j<=n-1; j++)
23449         {
23450             a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
23451         }
23452     }
23453     a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state));
23454     for(i=1; i<=n-2; i++)
23455     {
23456         a->ptr.pp_complex[i][i] = ae_complex_from_d((2*hqrnduniformi(&rs, 2, _state)-1)*ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state));
23457     }
23458     a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state));
23459 
23460     /*
23461      * Multiply
23462      */
23463     hmatrixrndmultiply(a, n, _state);
23464 
23465     /*
23466      * post-process to ensure that matrix diagonal is real
23467      */
23468     for(i=0; i<=n-1; i++)
23469     {
23470         a->ptr.pp_complex[i][i].y = (double)(0);
23471     }
23472     ae_frame_leave(_state);
23473 }
23474 
23475 
23476 /*************************************************************************
23477 Generation of random NxN Hermitian positive definite matrix with given
23478 condition number and norm2(A)=1
23479 
23480 INPUT PARAMETERS:
23481     N   -   matrix size
23482     C   -   condition number (in 2-norm)
23483 
23484 OUTPUT PARAMETERS:
23485     A   -   random HPD matrix with norm2(A)=1 and cond(A)=C
23486 
23487   -- ALGLIB routine --
23488      04.12.2009
23489      Bochkanov Sergey
23490 *************************************************************************/
hpdmatrixrndcond(ae_int_t n,double c,ae_matrix * a,ae_state * _state)23491 void hpdmatrixrndcond(ae_int_t n,
23492      double c,
23493      /* Complex */ ae_matrix* a,
23494      ae_state *_state)
23495 {
23496     ae_frame _frame_block;
23497     ae_int_t i;
23498     ae_int_t j;
23499     double l1;
23500     double l2;
23501     hqrndstate rs;
23502 
23503     ae_frame_make(_state, &_frame_block);
23504     memset(&rs, 0, sizeof(rs));
23505     ae_matrix_clear(a);
23506     _hqrndstate_init(&rs, _state, ae_true);
23507 
23508 
23509     /*
23510      * Special cases
23511      */
23512     if( n<=0||ae_fp_less(c,(double)(1)) )
23513     {
23514         ae_frame_leave(_state);
23515         return;
23516     }
23517     ae_matrix_set_length(a, n, n, _state);
23518     if( n==1 )
23519     {
23520         a->ptr.pp_complex[0][0] = ae_complex_from_i(1);
23521         ae_frame_leave(_state);
23522         return;
23523     }
23524 
23525     /*
23526      * Prepare matrix
23527      */
23528     hqrndrandomize(&rs, _state);
23529     l1 = (double)(0);
23530     l2 = ae_log(1/c, _state);
23531     for(i=0; i<=n-1; i++)
23532     {
23533         for(j=0; j<=n-1; j++)
23534         {
23535             a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
23536         }
23537     }
23538     a->ptr.pp_complex[0][0] = ae_complex_from_d(ae_exp(l1, _state));
23539     for(i=1; i<=n-2; i++)
23540     {
23541         a->ptr.pp_complex[i][i] = ae_complex_from_d(ae_exp(hqrnduniformr(&rs, _state)*(l2-l1)+l1, _state));
23542     }
23543     a->ptr.pp_complex[n-1][n-1] = ae_complex_from_d(ae_exp(l2, _state));
23544 
23545     /*
23546      * Multiply
23547      */
23548     hmatrixrndmultiply(a, n, _state);
23549 
23550     /*
23551      * post-process to ensure that matrix diagonal is real
23552      */
23553     for(i=0; i<=n-1; i++)
23554     {
23555         a->ptr.pp_complex[i][i].y = (double)(0);
23556     }
23557     ae_frame_leave(_state);
23558 }
23559 
23560 
23561 /*************************************************************************
23562 Multiplication of MxN matrix by NxN random Haar distributed orthogonal matrix
23563 
23564 INPUT PARAMETERS:
23565     A   -   matrix, array[0..M-1, 0..N-1]
23566     M, N-   matrix size
23567 
23568 OUTPUT PARAMETERS:
23569     A   -   A*Q, where Q is random NxN orthogonal matrix
23570 
23571   -- ALGLIB routine --
23572      04.12.2009
23573      Bochkanov Sergey
23574 *************************************************************************/
rmatrixrndorthogonalfromtheright(ae_matrix * a,ae_int_t m,ae_int_t n,ae_state * _state)23575 void rmatrixrndorthogonalfromtheright(/* Real    */ ae_matrix* a,
23576      ae_int_t m,
23577      ae_int_t n,
23578      ae_state *_state)
23579 {
23580     ae_frame _frame_block;
23581     double tau;
23582     double lambdav;
23583     ae_int_t s;
23584     ae_int_t i;
23585     double u1;
23586     double u2;
23587     ae_vector w;
23588     ae_vector v;
23589     hqrndstate state;
23590 
23591     ae_frame_make(_state, &_frame_block);
23592     memset(&w, 0, sizeof(w));
23593     memset(&v, 0, sizeof(v));
23594     memset(&state, 0, sizeof(state));
23595     ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
23596     ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
23597     _hqrndstate_init(&state, _state, ae_true);
23598 
23599     ae_assert(n>=1&&m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
23600     if( n==1 )
23601     {
23602 
23603         /*
23604          * Special case
23605          */
23606         tau = (double)(2*ae_randominteger(2, _state)-1);
23607         for(i=0; i<=m-1; i++)
23608         {
23609             a->ptr.pp_double[i][0] = a->ptr.pp_double[i][0]*tau;
23610         }
23611         ae_frame_leave(_state);
23612         return;
23613     }
23614 
23615     /*
23616      * General case.
23617      * First pass.
23618      */
23619     ae_vector_set_length(&w, m, _state);
23620     ae_vector_set_length(&v, n+1, _state);
23621     hqrndrandomize(&state, _state);
23622     for(s=2; s<=n; s++)
23623     {
23624 
23625         /*
23626          * Prepare random normal v
23627          */
23628         do
23629         {
23630             i = 1;
23631             while(i<=s)
23632             {
23633                 hqrndnormal2(&state, &u1, &u2, _state);
23634                 v.ptr.p_double[i] = u1;
23635                 if( i+1<=s )
23636                 {
23637                     v.ptr.p_double[i+1] = u2;
23638                 }
23639                 i = i+2;
23640             }
23641             lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s));
23642         }
23643         while(ae_fp_eq(lambdav,(double)(0)));
23644 
23645         /*
23646          * Prepare and apply reflection
23647          */
23648         generatereflection(&v, s, &tau, _state);
23649         v.ptr.p_double[1] = (double)(1);
23650         applyreflectionfromtheright(a, tau, &v, 0, m-1, n-s, n-1, &w, _state);
23651     }
23652 
23653     /*
23654      * Second pass.
23655      */
23656     for(i=0; i<=n-1; i++)
23657     {
23658         tau = (double)(2*hqrnduniformi(&state, 2, _state)-1);
23659         ae_v_muld(&a->ptr.pp_double[0][i], a->stride, ae_v_len(0,m-1), tau);
23660     }
23661     ae_frame_leave(_state);
23662 }
23663 
23664 
23665 /*************************************************************************
23666 Multiplication of MxN matrix by MxM random Haar distributed orthogonal matrix
23667 
23668 INPUT PARAMETERS:
23669     A   -   matrix, array[0..M-1, 0..N-1]
23670     M, N-   matrix size
23671 
23672 OUTPUT PARAMETERS:
23673     A   -   Q*A, where Q is random MxM orthogonal matrix
23674 
23675   -- ALGLIB routine --
23676      04.12.2009
23677      Bochkanov Sergey
23678 *************************************************************************/
rmatrixrndorthogonalfromtheleft(ae_matrix * a,ae_int_t m,ae_int_t n,ae_state * _state)23679 void rmatrixrndorthogonalfromtheleft(/* Real    */ ae_matrix* a,
23680      ae_int_t m,
23681      ae_int_t n,
23682      ae_state *_state)
23683 {
23684     ae_frame _frame_block;
23685     double tau;
23686     double lambdav;
23687     ae_int_t s;
23688     ae_int_t i;
23689     ae_int_t j;
23690     double u1;
23691     double u2;
23692     ae_vector w;
23693     ae_vector v;
23694     hqrndstate state;
23695 
23696     ae_frame_make(_state, &_frame_block);
23697     memset(&w, 0, sizeof(w));
23698     memset(&v, 0, sizeof(v));
23699     memset(&state, 0, sizeof(state));
23700     ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
23701     ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
23702     _hqrndstate_init(&state, _state, ae_true);
23703 
23704     ae_assert(n>=1&&m>=1, "RMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
23705     if( m==1 )
23706     {
23707 
23708         /*
23709          * special case
23710          */
23711         tau = (double)(2*ae_randominteger(2, _state)-1);
23712         for(j=0; j<=n-1; j++)
23713         {
23714             a->ptr.pp_double[0][j] = a->ptr.pp_double[0][j]*tau;
23715         }
23716         ae_frame_leave(_state);
23717         return;
23718     }
23719 
23720     /*
23721      * General case.
23722      * First pass.
23723      */
23724     ae_vector_set_length(&w, n, _state);
23725     ae_vector_set_length(&v, m+1, _state);
23726     hqrndrandomize(&state, _state);
23727     for(s=2; s<=m; s++)
23728     {
23729 
23730         /*
23731          * Prepare random normal v
23732          */
23733         do
23734         {
23735             i = 1;
23736             while(i<=s)
23737             {
23738                 hqrndnormal2(&state, &u1, &u2, _state);
23739                 v.ptr.p_double[i] = u1;
23740                 if( i+1<=s )
23741                 {
23742                     v.ptr.p_double[i+1] = u2;
23743                 }
23744                 i = i+2;
23745             }
23746             lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s));
23747         }
23748         while(ae_fp_eq(lambdav,(double)(0)));
23749 
23750         /*
23751          * Prepare and apply reflection
23752          */
23753         generatereflection(&v, s, &tau, _state);
23754         v.ptr.p_double[1] = (double)(1);
23755         applyreflectionfromtheleft(a, tau, &v, m-s, m-1, 0, n-1, &w, _state);
23756     }
23757 
23758     /*
23759      * Second pass.
23760      */
23761     for(i=0; i<=m-1; i++)
23762     {
23763         tau = (double)(2*hqrnduniformi(&state, 2, _state)-1);
23764         ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), tau);
23765     }
23766     ae_frame_leave(_state);
23767 }
23768 
23769 
23770 /*************************************************************************
23771 Multiplication of MxN complex matrix by NxN random Haar distributed
23772 complex orthogonal matrix
23773 
23774 INPUT PARAMETERS:
23775     A   -   matrix, array[0..M-1, 0..N-1]
23776     M, N-   matrix size
23777 
23778 OUTPUT PARAMETERS:
23779     A   -   A*Q, where Q is random NxN orthogonal matrix
23780 
23781   -- ALGLIB routine --
23782      04.12.2009
23783      Bochkanov Sergey
23784 *************************************************************************/
cmatrixrndorthogonalfromtheright(ae_matrix * a,ae_int_t m,ae_int_t n,ae_state * _state)23785 void cmatrixrndorthogonalfromtheright(/* Complex */ ae_matrix* a,
23786      ae_int_t m,
23787      ae_int_t n,
23788      ae_state *_state)
23789 {
23790     ae_frame _frame_block;
23791     ae_complex lambdav;
23792     ae_complex tau;
23793     ae_int_t s;
23794     ae_int_t i;
23795     ae_vector w;
23796     ae_vector v;
23797     hqrndstate state;
23798 
23799     ae_frame_make(_state, &_frame_block);
23800     memset(&w, 0, sizeof(w));
23801     memset(&v, 0, sizeof(v));
23802     memset(&state, 0, sizeof(state));
23803     ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true);
23804     ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
23805     _hqrndstate_init(&state, _state, ae_true);
23806 
23807     ae_assert(n>=1&&m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
23808     if( n==1 )
23809     {
23810 
23811         /*
23812          * Special case
23813          */
23814         hqrndrandomize(&state, _state);
23815         hqrndunit2(&state, &tau.x, &tau.y, _state);
23816         for(i=0; i<=m-1; i++)
23817         {
23818             a->ptr.pp_complex[i][0] = ae_c_mul(a->ptr.pp_complex[i][0],tau);
23819         }
23820         ae_frame_leave(_state);
23821         return;
23822     }
23823 
23824     /*
23825      * General case.
23826      * First pass.
23827      */
23828     ae_vector_set_length(&w, m, _state);
23829     ae_vector_set_length(&v, n+1, _state);
23830     hqrndrandomize(&state, _state);
23831     for(s=2; s<=n; s++)
23832     {
23833 
23834         /*
23835          * Prepare random normal v
23836          */
23837         do
23838         {
23839             for(i=1; i<=s; i++)
23840             {
23841                 hqrndnormal2(&state, &tau.x, &tau.y, _state);
23842                 v.ptr.p_complex[i] = tau;
23843             }
23844             lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s));
23845         }
23846         while(ae_c_eq_d(lambdav,(double)(0)));
23847 
23848         /*
23849          * Prepare and apply reflection
23850          */
23851         complexgeneratereflection(&v, s, &tau, _state);
23852         v.ptr.p_complex[1] = ae_complex_from_i(1);
23853         complexapplyreflectionfromtheright(a, tau, &v, 0, m-1, n-s, n-1, &w, _state);
23854     }
23855 
23856     /*
23857      * Second pass.
23858      */
23859     for(i=0; i<=n-1; i++)
23860     {
23861         hqrndunit2(&state, &tau.x, &tau.y, _state);
23862         ae_v_cmulc(&a->ptr.pp_complex[0][i], a->stride, ae_v_len(0,m-1), tau);
23863     }
23864     ae_frame_leave(_state);
23865 }
23866 
23867 
23868 /*************************************************************************
23869 Multiplication of MxN complex matrix by MxM random Haar distributed
23870 complex orthogonal matrix
23871 
23872 INPUT PARAMETERS:
23873     A   -   matrix, array[0..M-1, 0..N-1]
23874     M, N-   matrix size
23875 
23876 OUTPUT PARAMETERS:
23877     A   -   Q*A, where Q is random MxM orthogonal matrix
23878 
23879   -- ALGLIB routine --
23880      04.12.2009
23881      Bochkanov Sergey
23882 *************************************************************************/
cmatrixrndorthogonalfromtheleft(ae_matrix * a,ae_int_t m,ae_int_t n,ae_state * _state)23883 void cmatrixrndorthogonalfromtheleft(/* Complex */ ae_matrix* a,
23884      ae_int_t m,
23885      ae_int_t n,
23886      ae_state *_state)
23887 {
23888     ae_frame _frame_block;
23889     ae_complex tau;
23890     ae_complex lambdav;
23891     ae_int_t s;
23892     ae_int_t i;
23893     ae_int_t j;
23894     ae_vector w;
23895     ae_vector v;
23896     hqrndstate state;
23897 
23898     ae_frame_make(_state, &_frame_block);
23899     memset(&w, 0, sizeof(w));
23900     memset(&v, 0, sizeof(v));
23901     memset(&state, 0, sizeof(state));
23902     ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true);
23903     ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
23904     _hqrndstate_init(&state, _state, ae_true);
23905 
23906     ae_assert(n>=1&&m>=1, "CMatrixRndOrthogonalFromTheRight: N<1 or M<1!", _state);
23907     if( m==1 )
23908     {
23909 
23910         /*
23911          * special case
23912          */
23913         hqrndrandomize(&state, _state);
23914         hqrndunit2(&state, &tau.x, &tau.y, _state);
23915         for(j=0; j<=n-1; j++)
23916         {
23917             a->ptr.pp_complex[0][j] = ae_c_mul(a->ptr.pp_complex[0][j],tau);
23918         }
23919         ae_frame_leave(_state);
23920         return;
23921     }
23922 
23923     /*
23924      * General case.
23925      * First pass.
23926      */
23927     ae_vector_set_length(&w, n, _state);
23928     ae_vector_set_length(&v, m+1, _state);
23929     hqrndrandomize(&state, _state);
23930     for(s=2; s<=m; s++)
23931     {
23932 
23933         /*
23934          * Prepare random normal v
23935          */
23936         do
23937         {
23938             for(i=1; i<=s; i++)
23939             {
23940                 hqrndnormal2(&state, &tau.x, &tau.y, _state);
23941                 v.ptr.p_complex[i] = tau;
23942             }
23943             lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s));
23944         }
23945         while(ae_c_eq_d(lambdav,(double)(0)));
23946 
23947         /*
23948          * Prepare and apply reflection
23949          */
23950         complexgeneratereflection(&v, s, &tau, _state);
23951         v.ptr.p_complex[1] = ae_complex_from_i(1);
23952         complexapplyreflectionfromtheleft(a, tau, &v, m-s, m-1, 0, n-1, &w, _state);
23953     }
23954 
23955     /*
23956      * Second pass.
23957      */
23958     for(i=0; i<=m-1; i++)
23959     {
23960         hqrndunit2(&state, &tau.x, &tau.y, _state);
23961         ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), tau);
23962     }
23963     ae_frame_leave(_state);
23964 }
23965 
23966 
23967 /*************************************************************************
23968 Symmetric multiplication of NxN matrix by random Haar distributed
23969 orthogonal  matrix
23970 
23971 INPUT PARAMETERS:
23972     A   -   matrix, array[0..N-1, 0..N-1]
23973     N   -   matrix size
23974 
23975 OUTPUT PARAMETERS:
23976     A   -   Q'*A*Q, where Q is random NxN orthogonal matrix
23977 
23978   -- ALGLIB routine --
23979      04.12.2009
23980      Bochkanov Sergey
23981 *************************************************************************/
smatrixrndmultiply(ae_matrix * a,ae_int_t n,ae_state * _state)23982 void smatrixrndmultiply(/* Real    */ ae_matrix* a,
23983      ae_int_t n,
23984      ae_state *_state)
23985 {
23986     ae_frame _frame_block;
23987     double tau;
23988     double lambdav;
23989     ae_int_t s;
23990     ae_int_t i;
23991     double u1;
23992     double u2;
23993     ae_vector w;
23994     ae_vector v;
23995     hqrndstate state;
23996 
23997     ae_frame_make(_state, &_frame_block);
23998     memset(&w, 0, sizeof(w));
23999     memset(&v, 0, sizeof(v));
24000     memset(&state, 0, sizeof(state));
24001     ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
24002     ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
24003     _hqrndstate_init(&state, _state, ae_true);
24004 
24005 
24006     /*
24007      * General case.
24008      */
24009     ae_vector_set_length(&w, n, _state);
24010     ae_vector_set_length(&v, n+1, _state);
24011     hqrndrandomize(&state, _state);
24012     for(s=2; s<=n; s++)
24013     {
24014 
24015         /*
24016          * Prepare random normal v
24017          */
24018         do
24019         {
24020             i = 1;
24021             while(i<=s)
24022             {
24023                 hqrndnormal2(&state, &u1, &u2, _state);
24024                 v.ptr.p_double[i] = u1;
24025                 if( i+1<=s )
24026                 {
24027                     v.ptr.p_double[i+1] = u2;
24028                 }
24029                 i = i+2;
24030             }
24031             lambdav = ae_v_dotproduct(&v.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,s));
24032         }
24033         while(ae_fp_eq(lambdav,(double)(0)));
24034 
24035         /*
24036          * Prepare and apply reflection
24037          */
24038         generatereflection(&v, s, &tau, _state);
24039         v.ptr.p_double[1] = (double)(1);
24040         applyreflectionfromtheright(a, tau, &v, 0, n-1, n-s, n-1, &w, _state);
24041         applyreflectionfromtheleft(a, tau, &v, n-s, n-1, 0, n-1, &w, _state);
24042     }
24043 
24044     /*
24045      * Second pass.
24046      */
24047     for(i=0; i<=n-1; i++)
24048     {
24049         tau = (double)(2*hqrnduniformi(&state, 2, _state)-1);
24050         ae_v_muld(&a->ptr.pp_double[0][i], a->stride, ae_v_len(0,n-1), tau);
24051         ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), tau);
24052     }
24053 
24054     /*
24055      * Copy upper triangle to lower
24056      */
24057     for(i=0; i<=n-2; i++)
24058     {
24059         ae_v_move(&a->ptr.pp_double[i+1][i], a->stride, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1));
24060     }
24061     ae_frame_leave(_state);
24062 }
24063 
24064 
24065 /*************************************************************************
24066 Hermitian multiplication of NxN matrix by random Haar distributed
24067 complex orthogonal matrix
24068 
24069 INPUT PARAMETERS:
24070     A   -   matrix, array[0..N-1, 0..N-1]
24071     N   -   matrix size
24072 
24073 OUTPUT PARAMETERS:
24074     A   -   Q^H*A*Q, where Q is random NxN orthogonal matrix
24075 
24076   -- ALGLIB routine --
24077      04.12.2009
24078      Bochkanov Sergey
24079 *************************************************************************/
hmatrixrndmultiply(ae_matrix * a,ae_int_t n,ae_state * _state)24080 void hmatrixrndmultiply(/* Complex */ ae_matrix* a,
24081      ae_int_t n,
24082      ae_state *_state)
24083 {
24084     ae_frame _frame_block;
24085     ae_complex tau;
24086     ae_complex lambdav;
24087     ae_int_t s;
24088     ae_int_t i;
24089     ae_vector w;
24090     ae_vector v;
24091     hqrndstate state;
24092 
24093     ae_frame_make(_state, &_frame_block);
24094     memset(&w, 0, sizeof(w));
24095     memset(&v, 0, sizeof(v));
24096     memset(&state, 0, sizeof(state));
24097     ae_vector_init(&w, 0, DT_COMPLEX, _state, ae_true);
24098     ae_vector_init(&v, 0, DT_COMPLEX, _state, ae_true);
24099     _hqrndstate_init(&state, _state, ae_true);
24100 
24101 
24102     /*
24103      * General case.
24104      */
24105     ae_vector_set_length(&w, n, _state);
24106     ae_vector_set_length(&v, n+1, _state);
24107     hqrndrandomize(&state, _state);
24108     for(s=2; s<=n; s++)
24109     {
24110 
24111         /*
24112          * Prepare random normal v
24113          */
24114         do
24115         {
24116             for(i=1; i<=s; i++)
24117             {
24118                 hqrndnormal2(&state, &tau.x, &tau.y, _state);
24119                 v.ptr.p_complex[i] = tau;
24120             }
24121             lambdav = ae_v_cdotproduct(&v.ptr.p_complex[1], 1, "N", &v.ptr.p_complex[1], 1, "Conj", ae_v_len(1,s));
24122         }
24123         while(ae_c_eq_d(lambdav,(double)(0)));
24124 
24125         /*
24126          * Prepare and apply reflection
24127          */
24128         complexgeneratereflection(&v, s, &tau, _state);
24129         v.ptr.p_complex[1] = ae_complex_from_i(1);
24130         complexapplyreflectionfromtheright(a, tau, &v, 0, n-1, n-s, n-1, &w, _state);
24131         complexapplyreflectionfromtheleft(a, ae_c_conj(tau, _state), &v, n-s, n-1, 0, n-1, &w, _state);
24132     }
24133 
24134     /*
24135      * Second pass.
24136      */
24137     for(i=0; i<=n-1; i++)
24138     {
24139         hqrndunit2(&state, &tau.x, &tau.y, _state);
24140         ae_v_cmulc(&a->ptr.pp_complex[0][i], a->stride, ae_v_len(0,n-1), tau);
24141         tau = ae_c_conj(tau, _state);
24142         ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), tau);
24143     }
24144 
24145     /*
24146      * Change all values from lower triangle by complex-conjugate values
24147      * from upper one
24148      */
24149     for(i=0; i<=n-2; i++)
24150     {
24151         ae_v_cmove(&a->ptr.pp_complex[i+1][i], a->stride, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1));
24152     }
24153     for(s=0; s<=n-2; s++)
24154     {
24155         for(i=s+1; i<=n-1; i++)
24156         {
24157             a->ptr.pp_complex[i][s].y = -a->ptr.pp_complex[i][s].y;
24158         }
24159     }
24160     ae_frame_leave(_state);
24161 }
24162 
24163 
24164 #endif
24165 #if defined(AE_COMPILE_SPARSE) || !defined(AE_PARTIAL_BUILD)
24166 
24167 
24168 /*************************************************************************
24169 This function creates sparse matrix in a Hash-Table format.
24170 
24171 This function creates Hast-Table matrix, which can be  converted  to  CRS
24172 format after its initialization is over. Typical  usage  scenario  for  a
24173 sparse matrix is:
24174 1. creation in a Hash-Table format
24175 2. insertion of the matrix elements
24176 3. conversion to the CRS representation
24177 4. matrix is passed to some linear algebra algorithm
24178 
24179 Some  information  about  different matrix formats can be found below, in
24180 the "NOTES" section.
24181 
24182 INPUT PARAMETERS
24183     M           -   number of rows in a matrix, M>=1
24184     N           -   number of columns in a matrix, N>=1
24185     K           -   K>=0, expected number of non-zero elements in a matrix.
24186                     K can be inexact approximation, can be less than actual
24187                     number  of  elements  (table will grow when needed) or
24188                     even zero).
24189                     It is important to understand that although hash-table
24190                     may grow automatically, it is better to  provide  good
24191                     estimate of data size.
24192 
24193 OUTPUT PARAMETERS
24194     S           -   sparse M*N matrix in Hash-Table representation.
24195                     All elements of the matrix are zero.
24196 
24197 NOTE 1
24198 
24199 Hash-tables use memory inefficiently, and they have to keep  some  amount
24200 of the "spare memory" in order to have good performance. Hash  table  for
24201 matrix with K non-zero elements will  need  C*K*(8+2*sizeof(int))  bytes,
24202 where C is a small constant, about 1.5-2 in magnitude.
24203 
24204 CRS storage, from the other side, is  more  memory-efficient,  and  needs
24205 just K*(8+sizeof(int))+M*sizeof(int) bytes, where M is a number  of  rows
24206 in a matrix.
24207 
24208 When you convert from the Hash-Table to CRS  representation, all unneeded
24209 memory will be freed.
24210 
24211 NOTE 2
24212 
24213 Comments of SparseMatrix structure outline  information  about  different
24214 sparse storage formats. We recommend you to read them before starting  to
24215 use ALGLIB sparse matrices.
24216 
24217 NOTE 3
24218 
24219 This function completely  overwrites S with new sparse matrix. Previously
24220 allocated storage is NOT reused. If you  want  to reuse already allocated
24221 memory, call SparseCreateBuf function.
24222 
24223   -- ALGLIB PROJECT --
24224      Copyright 14.10.2011 by Bochkanov Sergey
24225 *************************************************************************/
sparsecreate(ae_int_t m,ae_int_t n,ae_int_t k,sparsematrix * s,ae_state * _state)24226 void sparsecreate(ae_int_t m,
24227      ae_int_t n,
24228      ae_int_t k,
24229      sparsematrix* s,
24230      ae_state *_state)
24231 {
24232 
24233     _sparsematrix_clear(s);
24234 
24235     sparsecreatebuf(m, n, k, s, _state);
24236 }
24237 
24238 
24239 /*************************************************************************
24240 This version of SparseCreate function creates sparse matrix in Hash-Table
24241 format, reusing previously allocated storage as much  as  possible.  Read
24242 comments for SparseCreate() for more information.
24243 
24244 INPUT PARAMETERS
24245     M           -   number of rows in a matrix, M>=1
24246     N           -   number of columns in a matrix, N>=1
24247     K           -   K>=0, expected number of non-zero elements in a matrix.
24248                     K can be inexact approximation, can be less than actual
24249                     number  of  elements  (table will grow when needed) or
24250                     even zero).
24251                     It is important to understand that although hash-table
24252                     may grow automatically, it is better to  provide  good
24253                     estimate of data size.
24254     S           -   SparseMatrix structure which MAY contain some  already
24255                     allocated storage.
24256 
24257 OUTPUT PARAMETERS
24258     S           -   sparse M*N matrix in Hash-Table representation.
24259                     All elements of the matrix are zero.
24260                     Previously allocated storage is reused, if  its  size
24261                     is compatible with expected number of non-zeros K.
24262 
24263   -- ALGLIB PROJECT --
24264      Copyright 14.01.2014 by Bochkanov Sergey
24265 *************************************************************************/
sparsecreatebuf(ae_int_t m,ae_int_t n,ae_int_t k,sparsematrix * s,ae_state * _state)24266 void sparsecreatebuf(ae_int_t m,
24267      ae_int_t n,
24268      ae_int_t k,
24269      sparsematrix* s,
24270      ae_state *_state)
24271 {
24272     ae_int_t i;
24273 
24274 
24275     ae_assert(m>0, "SparseCreateBuf: M<=0", _state);
24276     ae_assert(n>0, "SparseCreateBuf: N<=0", _state);
24277     ae_assert(k>=0, "SparseCreateBuf: K<0", _state);
24278 
24279     /*
24280      * Hash-table size is max(existing_size,requested_size)
24281      *
24282      * NOTE: it is important to use ALL available memory for hash table
24283      *       because it is impossible to efficiently reallocate table
24284      *       without temporary storage. So, if we want table with up to
24285      *       1.000.000 elements, we have to create such table from the
24286      *       very beginning. Otherwise, the very idea of memory reuse
24287      *       will be compromised.
24288      */
24289     s->tablesize = ae_round(k/sparse_desiredloadfactor+sparse_additional, _state);
24290     rvectorsetlengthatleast(&s->vals, s->tablesize, _state);
24291     s->tablesize = s->vals.cnt;
24292 
24293     /*
24294      * Initialize other fields
24295      */
24296     s->matrixtype = 0;
24297     s->m = m;
24298     s->n = n;
24299     s->nfree = s->tablesize;
24300     ivectorsetlengthatleast(&s->idx, 2*s->tablesize, _state);
24301     for(i=0; i<=s->tablesize-1; i++)
24302     {
24303         s->idx.ptr.p_int[2*i] = -1;
24304     }
24305 }
24306 
24307 
24308 /*************************************************************************
24309 This function creates sparse matrix in a CRS format (expert function for
24310 situations when you are running out of memory).
24311 
24312 This function creates CRS matrix. Typical usage scenario for a CRS matrix
24313 is:
24314 1. creation (you have to tell number of non-zero elements at each row  at
24315    this moment)
24316 2. insertion of the matrix elements (row by row, from left to right)
24317 3. matrix is passed to some linear algebra algorithm
24318 
24319 This function is a memory-efficient alternative to SparseCreate(), but it
24320 is more complex because it requires you to know in advance how large your
24321 matrix is. Some  information about  different matrix formats can be found
24322 in comments on SparseMatrix structure.  We recommend  you  to  read  them
24323 before starting to use ALGLIB sparse matrices..
24324 
24325 INPUT PARAMETERS
24326     M           -   number of rows in a matrix, M>=1
24327     N           -   number of columns in a matrix, N>=1
24328     NER         -   number of elements at each row, array[M], NER[I]>=0
24329 
24330 OUTPUT PARAMETERS
24331     S           -   sparse M*N matrix in CRS representation.
24332                     You have to fill ALL non-zero elements by calling
24333                     SparseSet() BEFORE you try to use this matrix.
24334 
24335 NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
24336       Previously allocated storage is NOT reused. If you  want  to  reuse
24337       already allocated memory, call SparseCreateCRSBuf function.
24338 
24339   -- ALGLIB PROJECT --
24340      Copyright 14.10.2011 by Bochkanov Sergey
24341 *************************************************************************/
sparsecreatecrs(ae_int_t m,ae_int_t n,ae_vector * ner,sparsematrix * s,ae_state * _state)24342 void sparsecreatecrs(ae_int_t m,
24343      ae_int_t n,
24344      /* Integer */ ae_vector* ner,
24345      sparsematrix* s,
24346      ae_state *_state)
24347 {
24348     ae_int_t i;
24349 
24350     _sparsematrix_clear(s);
24351 
24352     ae_assert(m>0, "SparseCreateCRS: M<=0", _state);
24353     ae_assert(n>0, "SparseCreateCRS: N<=0", _state);
24354     ae_assert(ner->cnt>=m, "SparseCreateCRS: Length(NER)<M", _state);
24355     for(i=0; i<=m-1; i++)
24356     {
24357         ae_assert(ner->ptr.p_int[i]>=0, "SparseCreateCRS: NER[] contains negative elements", _state);
24358     }
24359     sparsecreatecrsbuf(m, n, ner, s, _state);
24360 }
24361 
24362 
24363 /*************************************************************************
24364 This function creates sparse matrix in a CRS format (expert function  for
24365 situations when you are running out  of  memory).  This  version  of  CRS
24366 matrix creation function may reuse memory already allocated in S.
24367 
24368 This function creates CRS matrix. Typical usage scenario for a CRS matrix
24369 is:
24370 1. creation (you have to tell number of non-zero elements at each row  at
24371    this moment)
24372 2. insertion of the matrix elements (row by row, from left to right)
24373 3. matrix is passed to some linear algebra algorithm
24374 
24375 This function is a memory-efficient alternative to SparseCreate(), but it
24376 is more complex because it requires you to know in advance how large your
24377 matrix is. Some  information about  different matrix formats can be found
24378 in comments on SparseMatrix structure.  We recommend  you  to  read  them
24379 before starting to use ALGLIB sparse matrices..
24380 
24381 INPUT PARAMETERS
24382     M           -   number of rows in a matrix, M>=1
24383     N           -   number of columns in a matrix, N>=1
24384     NER         -   number of elements at each row, array[M], NER[I]>=0
24385     S           -   sparse matrix structure with possibly preallocated
24386                     memory.
24387 
24388 OUTPUT PARAMETERS
24389     S           -   sparse M*N matrix in CRS representation.
24390                     You have to fill ALL non-zero elements by calling
24391                     SparseSet() BEFORE you try to use this matrix.
24392 
24393   -- ALGLIB PROJECT --
24394      Copyright 14.10.2011 by Bochkanov Sergey
24395 *************************************************************************/
sparsecreatecrsbuf(ae_int_t m,ae_int_t n,ae_vector * ner,sparsematrix * s,ae_state * _state)24396 void sparsecreatecrsbuf(ae_int_t m,
24397      ae_int_t n,
24398      /* Integer */ ae_vector* ner,
24399      sparsematrix* s,
24400      ae_state *_state)
24401 {
24402     ae_int_t i;
24403     ae_int_t noe;
24404 
24405 
24406     ae_assert(m>0, "SparseCreateCRSBuf: M<=0", _state);
24407     ae_assert(n>0, "SparseCreateCRSBuf: N<=0", _state);
24408     ae_assert(ner->cnt>=m, "SparseCreateCRSBuf: Length(NER)<M", _state);
24409     noe = 0;
24410     s->matrixtype = 1;
24411     s->ninitialized = 0;
24412     s->m = m;
24413     s->n = n;
24414     ivectorsetlengthatleast(&s->ridx, s->m+1, _state);
24415     s->ridx.ptr.p_int[0] = 0;
24416     for(i=0; i<=s->m-1; i++)
24417     {
24418         ae_assert(ner->ptr.p_int[i]>=0, "SparseCreateCRSBuf: NER[] contains negative elements", _state);
24419         noe = noe+ner->ptr.p_int[i];
24420         s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i]+ner->ptr.p_int[i];
24421     }
24422     rvectorsetlengthatleast(&s->vals, noe, _state);
24423     ivectorsetlengthatleast(&s->idx, noe, _state);
24424     if( noe==0 )
24425     {
24426         sparseinitduidx(s, _state);
24427     }
24428 }
24429 
24430 
24431 /*************************************************************************
24432 This function creates sparse matrix in  a  SKS  format  (skyline  storage
24433 format). In most cases you do not need this function - CRS format  better
24434 suits most use cases.
24435 
24436 INPUT PARAMETERS
24437     M, N        -   number of rows(M) and columns (N) in a matrix:
24438                     * M=N (as for now, ALGLIB supports only square SKS)
24439                     * N>=1
24440                     * M>=1
24441     D           -   "bottom" bandwidths, array[M], D[I]>=0.
24442                     I-th element stores number of non-zeros at I-th  row,
24443                     below the diagonal (diagonal itself is not  included)
24444     U           -   "top" bandwidths, array[N], U[I]>=0.
24445                     I-th element stores number of non-zeros  at I-th row,
24446                     above the diagonal (diagonal itself  is not included)
24447 
24448 OUTPUT PARAMETERS
24449     S           -   sparse M*N matrix in SKS representation.
24450                     All elements are filled by zeros.
24451                     You may use sparseset() to change their values.
24452 
24453 NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
24454       Previously allocated storage is NOT reused. If you  want  to  reuse
24455       already allocated memory, call SparseCreateSKSBuf function.
24456 
24457   -- ALGLIB PROJECT --
24458      Copyright 13.01.2014 by Bochkanov Sergey
24459 *************************************************************************/
sparsecreatesks(ae_int_t m,ae_int_t n,ae_vector * d,ae_vector * u,sparsematrix * s,ae_state * _state)24460 void sparsecreatesks(ae_int_t m,
24461      ae_int_t n,
24462      /* Integer */ ae_vector* d,
24463      /* Integer */ ae_vector* u,
24464      sparsematrix* s,
24465      ae_state *_state)
24466 {
24467     ae_int_t i;
24468 
24469     _sparsematrix_clear(s);
24470 
24471     ae_assert(m>0, "SparseCreateSKS: M<=0", _state);
24472     ae_assert(n>0, "SparseCreateSKS: N<=0", _state);
24473     ae_assert(m==n, "SparseCreateSKS: M<>N", _state);
24474     ae_assert(d->cnt>=m, "SparseCreateSKS: Length(D)<M", _state);
24475     ae_assert(u->cnt>=n, "SparseCreateSKS: Length(U)<N", _state);
24476     for(i=0; i<=m-1; i++)
24477     {
24478         ae_assert(d->ptr.p_int[i]>=0, "SparseCreateSKS: D[] contains negative elements", _state);
24479         ae_assert(d->ptr.p_int[i]<=i, "SparseCreateSKS: D[I]>I for some I", _state);
24480     }
24481     for(i=0; i<=n-1; i++)
24482     {
24483         ae_assert(u->ptr.p_int[i]>=0, "SparseCreateSKS: U[] contains negative elements", _state);
24484         ae_assert(u->ptr.p_int[i]<=i, "SparseCreateSKS: U[I]>I for some I", _state);
24485     }
24486     sparsecreatesksbuf(m, n, d, u, s, _state);
24487 }
24488 
24489 
24490 /*************************************************************************
24491 This is "buffered"  version  of  SparseCreateSKS()  which  reuses  memory
24492 previously allocated in S (of course, memory is reallocated if needed).
24493 
24494 This function creates sparse matrix in  a  SKS  format  (skyline  storage
24495 format). In most cases you do not need this function - CRS format  better
24496 suits most use cases.
24497 
24498 INPUT PARAMETERS
24499     M, N        -   number of rows(M) and columns (N) in a matrix:
24500                     * M=N (as for now, ALGLIB supports only square SKS)
24501                     * N>=1
24502                     * M>=1
24503     D           -   "bottom" bandwidths, array[M], 0<=D[I]<=I.
24504                     I-th element stores number of non-zeros at I-th row,
24505                     below the diagonal (diagonal itself is not included)
24506     U           -   "top" bandwidths, array[N], 0<=U[I]<=I.
24507                     I-th element stores number of non-zeros at I-th row,
24508                     above the diagonal (diagonal itself is not included)
24509 
24510 OUTPUT PARAMETERS
24511     S           -   sparse M*N matrix in SKS representation.
24512                     All elements are filled by zeros.
24513                     You may use sparseset() to change their values.
24514 
24515   -- ALGLIB PROJECT --
24516      Copyright 13.01.2014 by Bochkanov Sergey
24517 *************************************************************************/
sparsecreatesksbuf(ae_int_t m,ae_int_t n,ae_vector * d,ae_vector * u,sparsematrix * s,ae_state * _state)24518 void sparsecreatesksbuf(ae_int_t m,
24519      ae_int_t n,
24520      /* Integer */ ae_vector* d,
24521      /* Integer */ ae_vector* u,
24522      sparsematrix* s,
24523      ae_state *_state)
24524 {
24525     ae_int_t i;
24526     ae_int_t minmn;
24527     ae_int_t nz;
24528     ae_int_t mxd;
24529     ae_int_t mxu;
24530 
24531 
24532     ae_assert(m>0, "SparseCreateSKSBuf: M<=0", _state);
24533     ae_assert(n>0, "SparseCreateSKSBuf: N<=0", _state);
24534     ae_assert(m==n, "SparseCreateSKSBuf: M<>N", _state);
24535     ae_assert(d->cnt>=m, "SparseCreateSKSBuf: Length(D)<M", _state);
24536     ae_assert(u->cnt>=n, "SparseCreateSKSBuf: Length(U)<N", _state);
24537     for(i=0; i<=m-1; i++)
24538     {
24539         ae_assert(d->ptr.p_int[i]>=0, "SparseCreateSKSBuf: D[] contains negative elements", _state);
24540         ae_assert(d->ptr.p_int[i]<=i, "SparseCreateSKSBuf: D[I]>I for some I", _state);
24541     }
24542     for(i=0; i<=n-1; i++)
24543     {
24544         ae_assert(u->ptr.p_int[i]>=0, "SparseCreateSKSBuf: U[] contains negative elements", _state);
24545         ae_assert(u->ptr.p_int[i]<=i, "SparseCreateSKSBuf: U[I]>I for some I", _state);
24546     }
24547     minmn = ae_minint(m, n, _state);
24548     s->matrixtype = 2;
24549     s->ninitialized = 0;
24550     s->m = m;
24551     s->n = n;
24552     ivectorsetlengthatleast(&s->ridx, minmn+1, _state);
24553     s->ridx.ptr.p_int[0] = 0;
24554     nz = 0;
24555     for(i=0; i<=minmn-1; i++)
24556     {
24557         nz = nz+1+d->ptr.p_int[i]+u->ptr.p_int[i];
24558         s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i]+1+d->ptr.p_int[i]+u->ptr.p_int[i];
24559     }
24560     rvectorsetlengthatleast(&s->vals, nz, _state);
24561     for(i=0; i<=nz-1; i++)
24562     {
24563         s->vals.ptr.p_double[i] = 0.0;
24564     }
24565     ivectorsetlengthatleast(&s->didx, m+1, _state);
24566     mxd = 0;
24567     for(i=0; i<=m-1; i++)
24568     {
24569         s->didx.ptr.p_int[i] = d->ptr.p_int[i];
24570         mxd = ae_maxint(mxd, d->ptr.p_int[i], _state);
24571     }
24572     s->didx.ptr.p_int[m] = mxd;
24573     ivectorsetlengthatleast(&s->uidx, n+1, _state);
24574     mxu = 0;
24575     for(i=0; i<=n-1; i++)
24576     {
24577         s->uidx.ptr.p_int[i] = u->ptr.p_int[i];
24578         mxu = ae_maxint(mxu, u->ptr.p_int[i], _state);
24579     }
24580     s->uidx.ptr.p_int[n] = mxu;
24581 }
24582 
24583 
24584 /*************************************************************************
24585 This function creates sparse matrix in  a  SKS  format  (skyline  storage
24586 format). Unlike more general  sparsecreatesks(),  this  function  creates
24587 sparse matrix with constant bandwidth.
24588 
24589 You may want to use this function instead of sparsecreatesks() when  your
24590 matrix has  constant  or  nearly-constant  bandwidth,  and  you  want  to
24591 simplify source code.
24592 
24593 INPUT PARAMETERS
24594     M, N        -   number of rows(M) and columns (N) in a matrix:
24595                     * M=N (as for now, ALGLIB supports only square SKS)
24596                     * N>=1
24597                     * M>=1
24598     BW          -   matrix bandwidth, BW>=0
24599 
24600 OUTPUT PARAMETERS
24601     S           -   sparse M*N matrix in SKS representation.
24602                     All elements are filled by zeros.
24603                     You may use sparseset() to  change  their values.
24604 
24605 NOTE: this function completely  overwrites  S  with  new  sparse  matrix.
24606       Previously allocated storage is NOT reused. If you  want  to  reuse
24607       already allocated memory, call sparsecreatesksbandbuf function.
24608 
24609   -- ALGLIB PROJECT --
24610      Copyright 25.12.2017 by Bochkanov Sergey
24611 *************************************************************************/
sparsecreatesksband(ae_int_t m,ae_int_t n,ae_int_t bw,sparsematrix * s,ae_state * _state)24612 void sparsecreatesksband(ae_int_t m,
24613      ae_int_t n,
24614      ae_int_t bw,
24615      sparsematrix* s,
24616      ae_state *_state)
24617 {
24618 
24619     _sparsematrix_clear(s);
24620 
24621     ae_assert(m>0, "SparseCreateSKSBand: M<=0", _state);
24622     ae_assert(n>0, "SparseCreateSKSBand: N<=0", _state);
24623     ae_assert(bw>=0, "SparseCreateSKSBand: BW<0", _state);
24624     ae_assert(m==n, "SparseCreateSKSBand: M!=N", _state);
24625     sparsecreatesksbandbuf(m, n, bw, s, _state);
24626 }
24627 
24628 
24629 /*************************************************************************
24630 This is "buffered" version  of  sparsecreatesksband() which reuses memory
24631 previously allocated in S (of course, memory is reallocated if needed).
24632 
24633 You may want to use this function instead  of  sparsecreatesksbuf()  when
24634 your matrix has  constant or nearly-constant  bandwidth,  and you want to
24635 simplify source code.
24636 
24637 INPUT PARAMETERS
24638     M, N        -   number of rows(M) and columns (N) in a matrix:
24639                     * M=N (as for now, ALGLIB supports only square SKS)
24640                     * N>=1
24641                     * M>=1
24642     BW          -   bandwidth, BW>=0
24643 
24644 OUTPUT PARAMETERS
24645     S           -   sparse M*N matrix in SKS representation.
24646                     All elements are filled by zeros.
24647                     You may use sparseset() to change their values.
24648 
24649   -- ALGLIB PROJECT --
24650      Copyright 13.01.2014 by Bochkanov Sergey
24651 *************************************************************************/
sparsecreatesksbandbuf(ae_int_t m,ae_int_t n,ae_int_t bw,sparsematrix * s,ae_state * _state)24652 void sparsecreatesksbandbuf(ae_int_t m,
24653      ae_int_t n,
24654      ae_int_t bw,
24655      sparsematrix* s,
24656      ae_state *_state)
24657 {
24658     ae_int_t i;
24659     ae_int_t minmn;
24660     ae_int_t nz;
24661     ae_int_t mxd;
24662     ae_int_t mxu;
24663     ae_int_t dui;
24664 
24665 
24666     ae_assert(m>0, "SparseCreateSKSBandBuf: M<=0", _state);
24667     ae_assert(n>0, "SparseCreateSKSBandBuf: N<=0", _state);
24668     ae_assert(m==n, "SparseCreateSKSBandBuf: M!=N", _state);
24669     ae_assert(bw>=0, "SparseCreateSKSBandBuf: BW<0", _state);
24670     minmn = ae_minint(m, n, _state);
24671     s->matrixtype = 2;
24672     s->ninitialized = 0;
24673     s->m = m;
24674     s->n = n;
24675     ivectorsetlengthatleast(&s->ridx, minmn+1, _state);
24676     s->ridx.ptr.p_int[0] = 0;
24677     nz = 0;
24678     for(i=0; i<=minmn-1; i++)
24679     {
24680         dui = ae_minint(i, bw, _state);
24681         nz = nz+1+2*dui;
24682         s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i]+1+2*dui;
24683     }
24684     rvectorsetlengthatleast(&s->vals, nz, _state);
24685     for(i=0; i<=nz-1; i++)
24686     {
24687         s->vals.ptr.p_double[i] = 0.0;
24688     }
24689     ivectorsetlengthatleast(&s->didx, m+1, _state);
24690     mxd = 0;
24691     for(i=0; i<=m-1; i++)
24692     {
24693         dui = ae_minint(i, bw, _state);
24694         s->didx.ptr.p_int[i] = dui;
24695         mxd = ae_maxint(mxd, dui, _state);
24696     }
24697     s->didx.ptr.p_int[m] = mxd;
24698     ivectorsetlengthatleast(&s->uidx, n+1, _state);
24699     mxu = 0;
24700     for(i=0; i<=n-1; i++)
24701     {
24702         dui = ae_minint(i, bw, _state);
24703         s->uidx.ptr.p_int[i] = dui;
24704         mxu = ae_maxint(mxu, dui, _state);
24705     }
24706     s->uidx.ptr.p_int[n] = mxu;
24707 }
24708 
24709 
24710 /*************************************************************************
24711 This function copies S0 to S1.
24712 This function completely deallocates memory owned by S1 before creating a
24713 copy of S0. If you want to reuse memory, use SparseCopyBuf.
24714 
24715 NOTE:  this  function  does  not verify its arguments, it just copies all
24716 fields of the structure.
24717 
24718   -- ALGLIB PROJECT --
24719      Copyright 14.10.2011 by Bochkanov Sergey
24720 *************************************************************************/
sparsecopy(sparsematrix * s0,sparsematrix * s1,ae_state * _state)24721 void sparsecopy(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
24722 {
24723 
24724     _sparsematrix_clear(s1);
24725 
24726     sparsecopybuf(s0, s1, _state);
24727 }
24728 
24729 
24730 /*************************************************************************
24731 This function copies S0 to S1.
24732 Memory already allocated in S1 is reused as much as possible.
24733 
24734 NOTE:  this  function  does  not verify its arguments, it just copies all
24735 fields of the structure.
24736 
24737   -- ALGLIB PROJECT --
24738      Copyright 14.10.2011 by Bochkanov Sergey
24739 *************************************************************************/
sparsecopybuf(sparsematrix * s0,sparsematrix * s1,ae_state * _state)24740 void sparsecopybuf(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
24741 {
24742     ae_int_t l;
24743     ae_int_t i;
24744 
24745 
24746     s1->matrixtype = s0->matrixtype;
24747     s1->m = s0->m;
24748     s1->n = s0->n;
24749     s1->nfree = s0->nfree;
24750     s1->ninitialized = s0->ninitialized;
24751     s1->tablesize = s0->tablesize;
24752 
24753     /*
24754      * Initialization for arrays
24755      */
24756     l = s0->vals.cnt;
24757     rvectorsetlengthatleast(&s1->vals, l, _state);
24758     for(i=0; i<=l-1; i++)
24759     {
24760         s1->vals.ptr.p_double[i] = s0->vals.ptr.p_double[i];
24761     }
24762     l = s0->ridx.cnt;
24763     ivectorsetlengthatleast(&s1->ridx, l, _state);
24764     for(i=0; i<=l-1; i++)
24765     {
24766         s1->ridx.ptr.p_int[i] = s0->ridx.ptr.p_int[i];
24767     }
24768     l = s0->idx.cnt;
24769     ivectorsetlengthatleast(&s1->idx, l, _state);
24770     for(i=0; i<=l-1; i++)
24771     {
24772         s1->idx.ptr.p_int[i] = s0->idx.ptr.p_int[i];
24773     }
24774 
24775     /*
24776      * Initalization for CRS-parameters
24777      */
24778     l = s0->uidx.cnt;
24779     ivectorsetlengthatleast(&s1->uidx, l, _state);
24780     for(i=0; i<=l-1; i++)
24781     {
24782         s1->uidx.ptr.p_int[i] = s0->uidx.ptr.p_int[i];
24783     }
24784     l = s0->didx.cnt;
24785     ivectorsetlengthatleast(&s1->didx, l, _state);
24786     for(i=0; i<=l-1; i++)
24787     {
24788         s1->didx.ptr.p_int[i] = s0->didx.ptr.p_int[i];
24789     }
24790 }
24791 
24792 
24793 /*************************************************************************
24794 This function efficiently swaps contents of S0 and S1.
24795 
24796   -- ALGLIB PROJECT --
24797      Copyright 16.01.2014 by Bochkanov Sergey
24798 *************************************************************************/
sparseswap(sparsematrix * s0,sparsematrix * s1,ae_state * _state)24799 void sparseswap(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
24800 {
24801 
24802 
24803     swapi(&s1->matrixtype, &s0->matrixtype, _state);
24804     swapi(&s1->m, &s0->m, _state);
24805     swapi(&s1->n, &s0->n, _state);
24806     swapi(&s1->nfree, &s0->nfree, _state);
24807     swapi(&s1->ninitialized, &s0->ninitialized, _state);
24808     swapi(&s1->tablesize, &s0->tablesize, _state);
24809     ae_swap_vectors(&s1->vals, &s0->vals);
24810     ae_swap_vectors(&s1->ridx, &s0->ridx);
24811     ae_swap_vectors(&s1->idx, &s0->idx);
24812     ae_swap_vectors(&s1->uidx, &s0->uidx);
24813     ae_swap_vectors(&s1->didx, &s0->didx);
24814 }
24815 
24816 
24817 /*************************************************************************
24818 This function adds value to S[i,j] - element of the sparse matrix. Matrix
24819 must be in a Hash-Table mode.
24820 
24821 In case S[i,j] already exists in the table, V i added to  its  value.  In
24822 case  S[i,j]  is  non-existent,  it  is  inserted  in  the  table.  Table
24823 automatically grows when necessary.
24824 
24825 INPUT PARAMETERS
24826     S           -   sparse M*N matrix in Hash-Table representation.
24827                     Exception will be thrown for CRS matrix.
24828     I           -   row index of the element to modify, 0<=I<M
24829     J           -   column index of the element to modify, 0<=J<N
24830     V           -   value to add, must be finite number
24831 
24832 OUTPUT PARAMETERS
24833     S           -   modified matrix
24834 
24835 NOTE 1:  when  S[i,j]  is exactly zero after modification, it is  deleted
24836 from the table.
24837 
24838   -- ALGLIB PROJECT --
24839      Copyright 14.10.2011 by Bochkanov Sergey
24840 *************************************************************************/
sparseadd(sparsematrix * s,ae_int_t i,ae_int_t j,double v,ae_state * _state)24841 void sparseadd(sparsematrix* s,
24842      ae_int_t i,
24843      ae_int_t j,
24844      double v,
24845      ae_state *_state)
24846 {
24847     ae_int_t hashcode;
24848     ae_int_t tcode;
24849     ae_int_t k;
24850 
24851 
24852     ae_assert(s->matrixtype==0, "SparseAdd: matrix must be in the Hash-Table mode to do this operation", _state);
24853     ae_assert(i>=0, "SparseAdd: I<0", _state);
24854     ae_assert(i<s->m, "SparseAdd: I>=M", _state);
24855     ae_assert(j>=0, "SparseAdd: J<0", _state);
24856     ae_assert(j<s->n, "SparseAdd: J>=N", _state);
24857     ae_assert(ae_isfinite(v, _state), "SparseAdd: V is not finite number", _state);
24858     if( ae_fp_eq(v,(double)(0)) )
24859     {
24860         return;
24861     }
24862     tcode = -1;
24863     k = s->tablesize;
24864     if( ae_fp_greater_eq((1-sparse_maxloadfactor)*k,(double)(s->nfree)) )
24865     {
24866         sparseresizematrix(s, _state);
24867         k = s->tablesize;
24868     }
24869     hashcode = sparse_hash(i, j, k, _state);
24870     for(;;)
24871     {
24872         if( s->idx.ptr.p_int[2*hashcode]==-1 )
24873         {
24874             if( tcode!=-1 )
24875             {
24876                 hashcode = tcode;
24877             }
24878             s->vals.ptr.p_double[hashcode] = v;
24879             s->idx.ptr.p_int[2*hashcode] = i;
24880             s->idx.ptr.p_int[2*hashcode+1] = j;
24881             if( tcode==-1 )
24882             {
24883                 s->nfree = s->nfree-1;
24884             }
24885             return;
24886         }
24887         else
24888         {
24889             if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
24890             {
24891                 s->vals.ptr.p_double[hashcode] = s->vals.ptr.p_double[hashcode]+v;
24892                 if( ae_fp_eq(s->vals.ptr.p_double[hashcode],(double)(0)) )
24893                 {
24894                     s->idx.ptr.p_int[2*hashcode] = -2;
24895                 }
24896                 return;
24897             }
24898 
24899             /*
24900              * Is it deleted element?
24901              */
24902             if( tcode==-1&&s->idx.ptr.p_int[2*hashcode]==-2 )
24903             {
24904                 tcode = hashcode;
24905             }
24906 
24907             /*
24908              * Next step
24909              */
24910             hashcode = (hashcode+1)%k;
24911         }
24912     }
24913 }
24914 
24915 
24916 /*************************************************************************
24917 This function modifies S[i,j] - element of the sparse matrix.
24918 
24919 For Hash-based storage format:
24920 * this function can be called at any moment - during matrix initialization
24921   or later
24922 * new value can be zero or non-zero.  In case new value of S[i,j] is zero,
24923   this element is deleted from the table.
24924 * this  function  has  no  effect when called with zero V for non-existent
24925   element.
24926 
24927 For CRS-bases storage format:
24928 * this function can be called ONLY DURING MATRIX INITIALIZATION
24929 * zero values are stored in the matrix similarly to non-zero ones
24930 * elements must be initialized in correct order -  from top row to bottom,
24931   within row - from left to right.
24932 
24933 For SKS storage:
24934 * this function can be called at any moment - during matrix initialization
24935   or later
24936 * zero values are stored in the matrix similarly to non-zero ones
24937 * this function CAN NOT be called for non-existent (outside  of  the  band
24938   specified during SKS matrix creation) elements. Say, if you created  SKS
24939   matrix  with  bandwidth=2  and  tried to call sparseset(s,0,10,VAL),  an
24940   exception will be generated.
24941 
24942 INPUT PARAMETERS
24943     S           -   sparse M*N matrix in Hash-Table, SKS or CRS format.
24944     I           -   row index of the element to modify, 0<=I<M
24945     J           -   column index of the element to modify, 0<=J<N
24946     V           -   value to set, must be finite number, can be zero
24947 
24948 OUTPUT PARAMETERS
24949     S           -   modified matrix
24950 
24951   -- ALGLIB PROJECT --
24952      Copyright 14.10.2011 by Bochkanov Sergey
24953 *************************************************************************/
sparseset(sparsematrix * s,ae_int_t i,ae_int_t j,double v,ae_state * _state)24954 void sparseset(sparsematrix* s,
24955      ae_int_t i,
24956      ae_int_t j,
24957      double v,
24958      ae_state *_state)
24959 {
24960     ae_int_t hashcode;
24961     ae_int_t tcode;
24962     ae_int_t k;
24963     ae_bool b;
24964 
24965 
24966     ae_assert((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2, "SparseSet: unsupported matrix storage format", _state);
24967     ae_assert(i>=0, "SparseSet: I<0", _state);
24968     ae_assert(i<s->m, "SparseSet: I>=M", _state);
24969     ae_assert(j>=0, "SparseSet: J<0", _state);
24970     ae_assert(j<s->n, "SparseSet: J>=N", _state);
24971     ae_assert(ae_isfinite(v, _state), "SparseSet: V is not finite number", _state);
24972 
24973     /*
24974      * Hash-table matrix
24975      */
24976     if( s->matrixtype==0 )
24977     {
24978         tcode = -1;
24979         k = s->tablesize;
24980         if( ae_fp_greater_eq((1-sparse_maxloadfactor)*k,(double)(s->nfree)) )
24981         {
24982             sparseresizematrix(s, _state);
24983             k = s->tablesize;
24984         }
24985         hashcode = sparse_hash(i, j, k, _state);
24986         for(;;)
24987         {
24988             if( s->idx.ptr.p_int[2*hashcode]==-1 )
24989             {
24990                 if( ae_fp_neq(v,(double)(0)) )
24991                 {
24992                     if( tcode!=-1 )
24993                     {
24994                         hashcode = tcode;
24995                     }
24996                     s->vals.ptr.p_double[hashcode] = v;
24997                     s->idx.ptr.p_int[2*hashcode] = i;
24998                     s->idx.ptr.p_int[2*hashcode+1] = j;
24999                     if( tcode==-1 )
25000                     {
25001                         s->nfree = s->nfree-1;
25002                     }
25003                 }
25004                 return;
25005             }
25006             else
25007             {
25008                 if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
25009                 {
25010                     if( ae_fp_eq(v,(double)(0)) )
25011                     {
25012                         s->idx.ptr.p_int[2*hashcode] = -2;
25013                     }
25014                     else
25015                     {
25016                         s->vals.ptr.p_double[hashcode] = v;
25017                     }
25018                     return;
25019                 }
25020                 if( tcode==-1&&s->idx.ptr.p_int[2*hashcode]==-2 )
25021                 {
25022                     tcode = hashcode;
25023                 }
25024 
25025                 /*
25026                  * Next step
25027                  */
25028                 hashcode = (hashcode+1)%k;
25029             }
25030         }
25031     }
25032 
25033     /*
25034      * CRS matrix
25035      */
25036     if( s->matrixtype==1 )
25037     {
25038         ae_assert(s->ridx.ptr.p_int[i]<=s->ninitialized, "SparseSet: too few initialized elements at some row (you have promised more when called SparceCreateCRS)", _state);
25039         ae_assert(s->ridx.ptr.p_int[i+1]>s->ninitialized, "SparseSet: too many initialized elements at some row (you have promised less when called SparceCreateCRS)", _state);
25040         ae_assert(s->ninitialized==s->ridx.ptr.p_int[i]||s->idx.ptr.p_int[s->ninitialized-1]<j, "SparseSet: incorrect column order (you must fill every row from left to right)", _state);
25041         s->vals.ptr.p_double[s->ninitialized] = v;
25042         s->idx.ptr.p_int[s->ninitialized] = j;
25043         s->ninitialized = s->ninitialized+1;
25044 
25045         /*
25046          * If matrix has been created then
25047          * initiale 'S.UIdx' and 'S.DIdx'
25048          */
25049         if( s->ninitialized==s->ridx.ptr.p_int[s->m] )
25050         {
25051             sparseinitduidx(s, _state);
25052         }
25053         return;
25054     }
25055 
25056     /*
25057      * SKS matrix
25058      */
25059     if( s->matrixtype==2 )
25060     {
25061         b = sparserewriteexisting(s, i, j, v, _state);
25062         ae_assert(b, "SparseSet: an attempt to initialize out-of-band element of the SKS matrix", _state);
25063         return;
25064     }
25065 }
25066 
25067 
25068 /*************************************************************************
25069 This function returns S[i,j] - element of the sparse matrix.  Matrix  can
25070 be in any mode (Hash-Table, CRS, SKS), but this function is less efficient
25071 for CRS matrices. Hash-Table and SKS matrices can find  element  in  O(1)
25072 time, while  CRS  matrices need O(log(RS)) time, where RS is an number of
25073 non-zero elements in a row.
25074 
25075 INPUT PARAMETERS
25076     S           -   sparse M*N matrix
25077     I           -   row index of the element to modify, 0<=I<M
25078     J           -   column index of the element to modify, 0<=J<N
25079 
25080 RESULT
25081     value of S[I,J] or zero (in case no element with such index is found)
25082 
25083   -- ALGLIB PROJECT --
25084      Copyright 14.10.2011 by Bochkanov Sergey
25085 *************************************************************************/
sparseget(sparsematrix * s,ae_int_t i,ae_int_t j,ae_state * _state)25086 double sparseget(sparsematrix* s,
25087      ae_int_t i,
25088      ae_int_t j,
25089      ae_state *_state)
25090 {
25091     ae_int_t hashcode;
25092     ae_int_t k;
25093     ae_int_t k0;
25094     ae_int_t k1;
25095     double result;
25096 
25097 
25098     ae_assert(i>=0, "SparseGet: I<0", _state);
25099     ae_assert(i<s->m, "SparseGet: I>=M", _state);
25100     ae_assert(j>=0, "SparseGet: J<0", _state);
25101     ae_assert(j<s->n, "SparseGet: J>=N", _state);
25102     result = 0.0;
25103     if( s->matrixtype==0 )
25104     {
25105 
25106         /*
25107          * Hash-based storage
25108          */
25109         result = (double)(0);
25110         k = s->tablesize;
25111         hashcode = sparse_hash(i, j, k, _state);
25112         for(;;)
25113         {
25114             if( s->idx.ptr.p_int[2*hashcode]==-1 )
25115             {
25116                 return result;
25117             }
25118             if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
25119             {
25120                 result = s->vals.ptr.p_double[hashcode];
25121                 return result;
25122             }
25123             hashcode = (hashcode+1)%k;
25124         }
25125     }
25126     if( s->matrixtype==1 )
25127     {
25128 
25129         /*
25130          * CRS
25131          */
25132         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGet: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
25133         k0 = s->ridx.ptr.p_int[i];
25134         k1 = s->ridx.ptr.p_int[i+1]-1;
25135         result = (double)(0);
25136         while(k0<=k1)
25137         {
25138             k = (k0+k1)/2;
25139             if( s->idx.ptr.p_int[k]==j )
25140             {
25141                 result = s->vals.ptr.p_double[k];
25142                 return result;
25143             }
25144             if( s->idx.ptr.p_int[k]<j )
25145             {
25146                 k0 = k+1;
25147             }
25148             else
25149             {
25150                 k1 = k-1;
25151             }
25152         }
25153         return result;
25154     }
25155     if( s->matrixtype==2 )
25156     {
25157 
25158         /*
25159          * SKS
25160          */
25161         ae_assert(s->m==s->n, "SparseGet: non-square SKS matrix not supported", _state);
25162         result = (double)(0);
25163         if( i==j )
25164         {
25165 
25166             /*
25167              * Return diagonal element
25168              */
25169             result = s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+s->didx.ptr.p_int[i]];
25170             return result;
25171         }
25172         if( j<i )
25173         {
25174 
25175             /*
25176              * Return subdiagonal element at I-th "skyline block"
25177              */
25178             k = s->didx.ptr.p_int[i];
25179             if( i-j<=k )
25180             {
25181                 result = s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+k+j-i];
25182             }
25183         }
25184         else
25185         {
25186 
25187             /*
25188              * Return superdiagonal element at J-th "skyline block"
25189              */
25190             k = s->uidx.ptr.p_int[j];
25191             if( j-i<=k )
25192             {
25193                 result = s->vals.ptr.p_double[s->ridx.ptr.p_int[j+1]-(j-i)];
25194             }
25195             return result;
25196         }
25197         return result;
25198     }
25199     ae_assert(ae_false, "SparseGet: unexpected matrix type", _state);
25200     return result;
25201 }
25202 
25203 
25204 /*************************************************************************
25205 This function checks whether S[i,j] is present in the sparse  matrix.  It
25206 returns True even for elements  that  are  numerically  zero  (but  still
25207 have place allocated for them).
25208 
25209 The matrix  can be in any mode (Hash-Table, CRS, SKS), but this  function
25210 is less efficient for CRS matrices. Hash-Table and SKS matrices can  find
25211 element in O(1) time, while  CRS  matrices need O(log(RS)) time, where RS
25212 is an number of non-zero elements in a row.
25213 
25214 INPUT PARAMETERS
25215     S           -   sparse M*N matrix
25216     I           -   row index of the element to modify, 0<=I<M
25217     J           -   column index of the element to modify, 0<=J<N
25218 
25219 RESULT
25220     whether S[I,J] is present in the data structure or not
25221 
25222   -- ALGLIB PROJECT --
25223      Copyright 14.10.2020 by Bochkanov Sergey
25224 *************************************************************************/
sparseexists(sparsematrix * s,ae_int_t i,ae_int_t j,ae_state * _state)25225 ae_bool sparseexists(sparsematrix* s,
25226      ae_int_t i,
25227      ae_int_t j,
25228      ae_state *_state)
25229 {
25230     ae_int_t hashcode;
25231     ae_int_t k;
25232     ae_int_t k0;
25233     ae_int_t k1;
25234     ae_bool result;
25235 
25236 
25237     ae_assert(i>=0, "SparseExists: I<0", _state);
25238     ae_assert(i<s->m, "SparseExists: I>=M", _state);
25239     ae_assert(j>=0, "SparseExists: J<0", _state);
25240     ae_assert(j<s->n, "SparseExists: J>=N", _state);
25241     result = ae_false;
25242     if( s->matrixtype==0 )
25243     {
25244 
25245         /*
25246          * Hash-based storage
25247          */
25248         k = s->tablesize;
25249         hashcode = sparse_hash(i, j, k, _state);
25250         for(;;)
25251         {
25252             if( s->idx.ptr.p_int[2*hashcode]==-1 )
25253             {
25254                 return result;
25255             }
25256             if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
25257             {
25258                 result = ae_true;
25259                 return result;
25260             }
25261             hashcode = (hashcode+1)%k;
25262         }
25263     }
25264     if( s->matrixtype==1 )
25265     {
25266 
25267         /*
25268          * CRS
25269          */
25270         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseExists: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
25271         k0 = s->ridx.ptr.p_int[i];
25272         k1 = s->ridx.ptr.p_int[i+1]-1;
25273         while(k0<=k1)
25274         {
25275             k = (k0+k1)/2;
25276             if( s->idx.ptr.p_int[k]==j )
25277             {
25278                 result = ae_true;
25279                 return result;
25280             }
25281             if( s->idx.ptr.p_int[k]<j )
25282             {
25283                 k0 = k+1;
25284             }
25285             else
25286             {
25287                 k1 = k-1;
25288             }
25289         }
25290         return result;
25291     }
25292     if( s->matrixtype==2 )
25293     {
25294 
25295         /*
25296          * SKS
25297          */
25298         ae_assert(s->m==s->n, "SparseExists: non-square SKS matrix not supported", _state);
25299         if( i==j )
25300         {
25301 
25302             /*
25303              * Return diagonal element
25304              */
25305             result = ae_true;
25306             return result;
25307         }
25308         if( j<i )
25309         {
25310 
25311             /*
25312              * Return subdiagonal element at I-th "skyline block"
25313              */
25314             if( i-j<=s->didx.ptr.p_int[i] )
25315             {
25316                 result = ae_true;
25317             }
25318         }
25319         else
25320         {
25321 
25322             /*
25323              * Return superdiagonal element at J-th "skyline block"
25324              */
25325             if( j-i<=s->uidx.ptr.p_int[j] )
25326             {
25327                 result = ae_true;
25328             }
25329             return result;
25330         }
25331         return result;
25332     }
25333     ae_assert(ae_false, "SparseExists: unexpected matrix type", _state);
25334     return result;
25335 }
25336 
25337 
25338 /*************************************************************************
25339 This function returns I-th diagonal element of the sparse matrix.
25340 
25341 Matrix can be in any mode (Hash-Table or CRS storage), but this  function
25342 is most efficient for CRS matrices - it requires less than 50 CPU  cycles
25343 to extract diagonal element. For Hash-Table matrices we still  have  O(1)
25344 query time, but function is many times slower.
25345 
25346 INPUT PARAMETERS
25347     S           -   sparse M*N matrix in Hash-Table representation.
25348                     Exception will be thrown for CRS matrix.
25349     I           -   index of the element to modify, 0<=I<min(M,N)
25350 
25351 RESULT
25352     value of S[I,I] or zero (in case no element with such index is found)
25353 
25354   -- ALGLIB PROJECT --
25355      Copyright 14.10.2011 by Bochkanov Sergey
25356 *************************************************************************/
sparsegetdiagonal(sparsematrix * s,ae_int_t i,ae_state * _state)25357 double sparsegetdiagonal(sparsematrix* s, ae_int_t i, ae_state *_state)
25358 {
25359     double result;
25360 
25361 
25362     ae_assert(i>=0, "SparseGetDiagonal: I<0", _state);
25363     ae_assert(i<s->m, "SparseGetDiagonal: I>=M", _state);
25364     ae_assert(i<s->n, "SparseGetDiagonal: I>=N", _state);
25365     result = (double)(0);
25366     if( s->matrixtype==0 )
25367     {
25368         result = sparseget(s, i, i, _state);
25369         return result;
25370     }
25371     if( s->matrixtype==1 )
25372     {
25373         if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
25374         {
25375             result = s->vals.ptr.p_double[s->didx.ptr.p_int[i]];
25376         }
25377         return result;
25378     }
25379     if( s->matrixtype==2 )
25380     {
25381         ae_assert(s->m==s->n, "SparseGetDiagonal: non-square SKS matrix not supported", _state);
25382         result = s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+s->didx.ptr.p_int[i]];
25383         return result;
25384     }
25385     ae_assert(ae_false, "SparseGetDiagonal: unexpected matrix type", _state);
25386     return result;
25387 }
25388 
25389 
25390 /*************************************************************************
25391 This function calculates matrix-vector product  S*x.  Matrix  S  must  be
25392 stored in CRS or SKS format (exception will be thrown otherwise).
25393 
25394 INPUT PARAMETERS
25395     S           -   sparse M*N matrix in CRS or SKS format.
25396     X           -   array[N], input vector. For  performance  reasons  we
25397                     make only quick checks - we check that array size  is
25398                     at least N, but we do not check for NAN's or INF's.
25399     Y           -   output buffer, possibly preallocated. In case  buffer
25400                     size is too small to store  result,  this  buffer  is
25401                     automatically resized.
25402 
25403 OUTPUT PARAMETERS
25404     Y           -   array[M], S*x
25405 
25406 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
25407 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
25408 this function.
25409 
25410   -- ALGLIB PROJECT --
25411      Copyright 14.10.2011 by Bochkanov Sergey
25412 *************************************************************************/
sparsemv(sparsematrix * s,ae_vector * x,ae_vector * y,ae_state * _state)25413 void sparsemv(sparsematrix* s,
25414      /* Real    */ ae_vector* x,
25415      /* Real    */ ae_vector* y,
25416      ae_state *_state)
25417 {
25418     double tval;
25419     double v;
25420     double vv;
25421     ae_int_t i;
25422     ae_int_t j;
25423     ae_int_t lt;
25424     ae_int_t rt;
25425     ae_int_t lt1;
25426     ae_int_t rt1;
25427     ae_int_t n;
25428     ae_int_t m;
25429     ae_int_t d;
25430     ae_int_t u;
25431     ae_int_t ri;
25432     ae_int_t ri1;
25433 
25434 
25435     ae_assert(x->cnt>=s->n, "SparseMV: length(X)<N", _state);
25436     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
25437     rvectorsetlengthatleast(y, s->m, _state);
25438     n = s->n;
25439     m = s->m;
25440     if( s->matrixtype==1 )
25441     {
25442 
25443         /*
25444          * CRS format.
25445          * Perform integrity check.
25446          */
25447         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
25448 
25449         /*
25450          * Try vendor kernels
25451          */
25452         if( sparsegemvcrsmkl(0, s->m, s->n, 1.0, &s->vals, &s->idx, &s->ridx, x, 0, 0.0, y, 0, _state) )
25453         {
25454             return;
25455         }
25456 
25457         /*
25458          * Our own implementation
25459          */
25460         for(i=0; i<=m-1; i++)
25461         {
25462             tval = (double)(0);
25463             lt = s->ridx.ptr.p_int[i];
25464             rt = s->ridx.ptr.p_int[i+1]-1;
25465             for(j=lt; j<=rt; j++)
25466             {
25467                 tval = tval+x->ptr.p_double[s->idx.ptr.p_int[j]]*s->vals.ptr.p_double[j];
25468             }
25469             y->ptr.p_double[i] = tval;
25470         }
25471         return;
25472     }
25473     if( s->matrixtype==2 )
25474     {
25475 
25476         /*
25477          * SKS format
25478          */
25479         ae_assert(s->m==s->n, "SparseMV: non-square SKS matrices are not supported", _state);
25480         for(i=0; i<=n-1; i++)
25481         {
25482             ri = s->ridx.ptr.p_int[i];
25483             ri1 = s->ridx.ptr.p_int[i+1];
25484             d = s->didx.ptr.p_int[i];
25485             u = s->uidx.ptr.p_int[i];
25486             v = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
25487             if( d>0 )
25488             {
25489                 lt = ri;
25490                 rt = ri+d-1;
25491                 lt1 = i-d;
25492                 rt1 = i-1;
25493                 vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
25494                 v = v+vv;
25495             }
25496             y->ptr.p_double[i] = v;
25497             if( u>0 )
25498             {
25499                 lt = ri1-u;
25500                 rt = ri1-1;
25501                 lt1 = i-u;
25502                 rt1 = i-1;
25503                 v = x->ptr.p_double[i];
25504                 ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
25505             }
25506         }
25507         return;
25508     }
25509 }
25510 
25511 
25512 /*************************************************************************
25513 This function calculates matrix-vector product  S^T*x. Matrix S  must  be
25514 stored in CRS or SKS format (exception will be thrown otherwise).
25515 
25516 INPUT PARAMETERS
25517     S           -   sparse M*N matrix in CRS or SKS format.
25518     X           -   array[M], input vector. For  performance  reasons  we
25519                     make only quick checks - we check that array size  is
25520                     at least M, but we do not check for NAN's or INF's.
25521     Y           -   output buffer, possibly preallocated. In case  buffer
25522                     size is too small to store  result,  this  buffer  is
25523                     automatically resized.
25524 
25525 OUTPUT PARAMETERS
25526     Y           -   array[N], S^T*x
25527 
25528 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
25529 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
25530 this function.
25531 
25532   -- ALGLIB PROJECT --
25533      Copyright 14.10.2011 by Bochkanov Sergey
25534 *************************************************************************/
sparsemtv(sparsematrix * s,ae_vector * x,ae_vector * y,ae_state * _state)25535 void sparsemtv(sparsematrix* s,
25536      /* Real    */ ae_vector* x,
25537      /* Real    */ ae_vector* y,
25538      ae_state *_state)
25539 {
25540     ae_int_t i;
25541     ae_int_t j;
25542     ae_int_t lt;
25543     ae_int_t rt;
25544     ae_int_t ct;
25545     ae_int_t lt1;
25546     ae_int_t rt1;
25547     double v;
25548     double vv;
25549     ae_int_t n;
25550     ae_int_t m;
25551     ae_int_t ri;
25552     ae_int_t ri1;
25553     ae_int_t d;
25554     ae_int_t u;
25555 
25556 
25557     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMTV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
25558     ae_assert(x->cnt>=s->m, "SparseMTV: Length(X)<M", _state);
25559     n = s->n;
25560     m = s->m;
25561     rvectorsetlengthatleast(y, n, _state);
25562     for(i=0; i<=n-1; i++)
25563     {
25564         y->ptr.p_double[i] = (double)(0);
25565     }
25566     if( s->matrixtype==1 )
25567     {
25568 
25569         /*
25570          * CRS format
25571          * Perform integrity check.
25572          */
25573         ae_assert(s->ninitialized==s->ridx.ptr.p_int[m], "SparseMTV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
25574 
25575         /*
25576          * Try vendor kernels
25577          */
25578         if( sparsegemvcrsmkl(1, s->m, s->n, 1.0, &s->vals, &s->idx, &s->ridx, x, 0, 0.0, y, 0, _state) )
25579         {
25580             return;
25581         }
25582 
25583         /*
25584          * Our own implementation
25585          */
25586         for(i=0; i<=m-1; i++)
25587         {
25588             lt = s->ridx.ptr.p_int[i];
25589             rt = s->ridx.ptr.p_int[i+1];
25590             v = x->ptr.p_double[i];
25591             for(j=lt; j<=rt-1; j++)
25592             {
25593                 ct = s->idx.ptr.p_int[j];
25594                 y->ptr.p_double[ct] = y->ptr.p_double[ct]+v*s->vals.ptr.p_double[j];
25595             }
25596         }
25597         return;
25598     }
25599     if( s->matrixtype==2 )
25600     {
25601 
25602         /*
25603          * SKS format
25604          */
25605         ae_assert(s->m==s->n, "SparseMV: non-square SKS matrices are not supported", _state);
25606         for(i=0; i<=n-1; i++)
25607         {
25608             ri = s->ridx.ptr.p_int[i];
25609             ri1 = s->ridx.ptr.p_int[i+1];
25610             d = s->didx.ptr.p_int[i];
25611             u = s->uidx.ptr.p_int[i];
25612             if( d>0 )
25613             {
25614                 lt = ri;
25615                 rt = ri+d-1;
25616                 lt1 = i-d;
25617                 rt1 = i-1;
25618                 v = x->ptr.p_double[i];
25619                 ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
25620             }
25621             v = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
25622             if( u>0 )
25623             {
25624                 lt = ri1-u;
25625                 rt = ri1-1;
25626                 lt1 = i-u;
25627                 rt1 = i-1;
25628                 vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
25629                 v = v+vv;
25630             }
25631             y->ptr.p_double[i] = v;
25632         }
25633         return;
25634     }
25635 }
25636 
25637 
25638 /*************************************************************************
25639 This function calculates generalized sparse matrix-vector product
25640 
25641     y := alpha*op(S)*x + beta*y
25642 
25643 Matrix S must be stored in CRS or SKS format (exception  will  be  thrown
25644 otherwise). op(S) can be either S or S^T.
25645 
25646 NOTE: this  function  expects  Y  to  be  large enough to store result. No
25647       automatic preallocation happens for smaller arrays.
25648 
25649 INPUT PARAMETERS
25650     S           -   sparse matrix in CRS or SKS format.
25651     Alpha       -   source coefficient
25652     OpS         -   operation type:
25653                     * OpS=0     =>  op(S) = S
25654                     * OpS=1     =>  op(S) = S^T
25655     X           -   input vector, must have at least Cols(op(S))+IX elements
25656     IX          -   subvector offset
25657     Beta        -   destination coefficient
25658     Y           -   preallocated output array, must have at least Rows(op(S))+IY elements
25659     IY          -   subvector offset
25660 
25661 OUTPUT PARAMETERS
25662     Y           -   elements [IY...IY+Rows(op(S))-1] are replaced by result,
25663                     other elements are not modified
25664 
25665 HANDLING OF SPECIAL CASES:
25666 * below M=Rows(op(S)) and N=Cols(op(S)). Although current  ALGLIB  version
25667   does not allow you to  create  zero-sized  sparse  matrices,  internally
25668   ALGLIB  can  deal  with  such matrices. So, comments for M or N equal to
25669   zero are for internal use only.
25670 * if M=0, then subroutine does nothing. It does not even touch arrays.
25671 * if N=0 or Alpha=0.0, then:
25672   * if Beta=0, then Y is filled by zeros. S and X are  not  referenced  at
25673     all. Initial values of Y are ignored (we do not  multiply  Y by  zero,
25674     we just rewrite it by zeros)
25675   * if Beta<>0, then Y is replaced by Beta*Y
25676 * if M>0, N>0, Alpha<>0, but  Beta=0, then  Y is replaced by alpha*op(S)*x
25677   initial state of Y  is ignored (rewritten without initial multiplication
25678   by zeros).
25679 
25680 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
25681 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
25682 this function.
25683 
25684   -- ALGLIB PROJECT --
25685      Copyright 10.12.2019 by Bochkanov Sergey
25686 *************************************************************************/
sparsegemv(sparsematrix * s,double alpha,ae_int_t ops,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)25687 void sparsegemv(sparsematrix* s,
25688      double alpha,
25689      ae_int_t ops,
25690      /* Real    */ ae_vector* x,
25691      ae_int_t ix,
25692      double beta,
25693      /* Real    */ ae_vector* y,
25694      ae_int_t iy,
25695      ae_state *_state)
25696 {
25697     ae_int_t opm;
25698     ae_int_t opn;
25699     ae_int_t rawm;
25700     ae_int_t rawn;
25701     ae_int_t i;
25702     ae_int_t j;
25703     double tval;
25704     ae_int_t lt;
25705     ae_int_t rt;
25706     ae_int_t ct;
25707     ae_int_t d;
25708     ae_int_t u;
25709     ae_int_t ri;
25710     ae_int_t ri1;
25711     double v;
25712     double vv;
25713     ae_int_t lt1;
25714     ae_int_t rt1;
25715 
25716 
25717     ae_assert(ops==0||ops==1, "SparseGEMV: incorrect OpS", _state);
25718     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseGEMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
25719     if( ops==0 )
25720     {
25721         opm = s->m;
25722         opn = s->n;
25723     }
25724     else
25725     {
25726         opm = s->n;
25727         opn = s->m;
25728     }
25729     ae_assert(opm>=0&&opn>=0, "SparseGEMV: op(S) has negative size", _state);
25730     ae_assert(opn==0||x->cnt+ix>=opn, "SparseGEMV: X is too short", _state);
25731     ae_assert(opm==0||y->cnt+iy>=opm, "SparseGEMV: X is too short", _state);
25732     rawm = s->m;
25733     rawn = s->n;
25734 
25735     /*
25736      * Quick exit strategies
25737      */
25738     if( opm==0 )
25739     {
25740         return;
25741     }
25742     if( ae_fp_neq(beta,(double)(0)) )
25743     {
25744         for(i=0; i<=opm-1; i++)
25745         {
25746             y->ptr.p_double[iy+i] = beta*y->ptr.p_double[iy+i];
25747         }
25748     }
25749     else
25750     {
25751         for(i=0; i<=opm-1; i++)
25752         {
25753             y->ptr.p_double[iy+i] = 0.0;
25754         }
25755     }
25756     if( opn==0||ae_fp_eq(alpha,(double)(0)) )
25757     {
25758         return;
25759     }
25760 
25761     /*
25762      * Now we have OpM>=1, OpN>=1, Alpha<>0
25763      */
25764     if( ops==0 )
25765     {
25766 
25767         /*
25768          * Compute generalized product y := alpha*S*x + beta*y
25769          * (with "beta*y" part already computed).
25770          */
25771         if( s->matrixtype==1 )
25772         {
25773 
25774             /*
25775              * CRS format.
25776              * Perform integrity check.
25777              */
25778             ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGEMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
25779 
25780             /*
25781              * Try vendor kernels
25782              */
25783             if( sparsegemvcrsmkl(0, s->m, s->n, alpha, &s->vals, &s->idx, &s->ridx, x, ix, 1.0, y, iy, _state) )
25784             {
25785                 return;
25786             }
25787 
25788             /*
25789              * Our own implementation
25790              */
25791             for(i=0; i<=rawm-1; i++)
25792             {
25793                 tval = (double)(0);
25794                 lt = s->ridx.ptr.p_int[i];
25795                 rt = s->ridx.ptr.p_int[i+1]-1;
25796                 for(j=lt; j<=rt; j++)
25797                 {
25798                     tval = tval+x->ptr.p_double[s->idx.ptr.p_int[j]+ix]*s->vals.ptr.p_double[j];
25799                 }
25800                 y->ptr.p_double[i+iy] = alpha*tval+y->ptr.p_double[i+iy];
25801             }
25802             return;
25803         }
25804         if( s->matrixtype==2 )
25805         {
25806 
25807             /*
25808              * SKS format
25809              */
25810             ae_assert(s->m==s->n, "SparseMV: non-square SKS matrices are not supported", _state);
25811             for(i=0; i<=rawn-1; i++)
25812             {
25813                 ri = s->ridx.ptr.p_int[i];
25814                 ri1 = s->ridx.ptr.p_int[i+1];
25815                 d = s->didx.ptr.p_int[i];
25816                 u = s->uidx.ptr.p_int[i];
25817                 v = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i+ix];
25818                 if( d>0 )
25819                 {
25820                     lt = ri;
25821                     rt = ri+d-1;
25822                     lt1 = i-d+ix;
25823                     rt1 = i-1+ix;
25824                     vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
25825                     v = v+vv;
25826                 }
25827                 y->ptr.p_double[i+iy] = alpha*v+y->ptr.p_double[i+iy];
25828                 if( u>0 )
25829                 {
25830                     lt = ri1-u;
25831                     rt = ri1-1;
25832                     lt1 = i-u+iy;
25833                     rt1 = i-1+iy;
25834                     v = alpha*x->ptr.p_double[i+ix];
25835                     ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
25836                 }
25837             }
25838             return;
25839         }
25840     }
25841     else
25842     {
25843 
25844         /*
25845          * Compute generalized product y := alpha*S^T*x + beta*y
25846          * (with "beta*y" part already computed).
25847          */
25848         if( s->matrixtype==1 )
25849         {
25850 
25851             /*
25852              * CRS format
25853              * Perform integrity check.
25854              */
25855             ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGEMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
25856 
25857             /*
25858              * Try vendor kernels
25859              */
25860             if( sparsegemvcrsmkl(1, s->m, s->n, alpha, &s->vals, &s->idx, &s->ridx, x, ix, 1.0, y, iy, _state) )
25861             {
25862                 return;
25863             }
25864 
25865             /*
25866              * Our own implementation
25867              */
25868             for(i=0; i<=rawm-1; i++)
25869             {
25870                 lt = s->ridx.ptr.p_int[i];
25871                 rt = s->ridx.ptr.p_int[i+1];
25872                 v = alpha*x->ptr.p_double[i+ix];
25873                 for(j=lt; j<=rt-1; j++)
25874                 {
25875                     ct = s->idx.ptr.p_int[j]+iy;
25876                     y->ptr.p_double[ct] = y->ptr.p_double[ct]+v*s->vals.ptr.p_double[j];
25877                 }
25878             }
25879             return;
25880         }
25881         if( s->matrixtype==2 )
25882         {
25883 
25884             /*
25885              * SKS format
25886              */
25887             ae_assert(s->m==s->n, "SparseGEMV: non-square SKS matrices are not supported", _state);
25888             for(i=0; i<=rawn-1; i++)
25889             {
25890                 ri = s->ridx.ptr.p_int[i];
25891                 ri1 = s->ridx.ptr.p_int[i+1];
25892                 d = s->didx.ptr.p_int[i];
25893                 u = s->uidx.ptr.p_int[i];
25894                 if( d>0 )
25895                 {
25896                     lt = ri;
25897                     rt = ri+d-1;
25898                     lt1 = i-d+iy;
25899                     rt1 = i-1+iy;
25900                     v = alpha*x->ptr.p_double[i+ix];
25901                     ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
25902                 }
25903                 v = alpha*s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i+ix];
25904                 if( u>0 )
25905                 {
25906                     lt = ri1-u;
25907                     rt = ri1-1;
25908                     lt1 = i-u+ix;
25909                     rt1 = i-1+ix;
25910                     vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
25911                     v = v+alpha*vv;
25912                 }
25913                 y->ptr.p_double[i+iy] = v+y->ptr.p_double[i+iy];
25914             }
25915             return;
25916         }
25917     }
25918 }
25919 
25920 
25921 /*************************************************************************
25922 This function simultaneously calculates two matrix-vector products:
25923     S*x and S^T*x.
25924 S must be square (non-rectangular) matrix stored in  CRS  or  SKS  format
25925 (exception will be thrown otherwise).
25926 
25927 INPUT PARAMETERS
25928     S           -   sparse N*N matrix in CRS or SKS format.
25929     X           -   array[N], input vector. For  performance  reasons  we
25930                     make only quick checks - we check that array size  is
25931                     at least N, but we do not check for NAN's or INF's.
25932     Y0          -   output buffer, possibly preallocated. In case  buffer
25933                     size is too small to store  result,  this  buffer  is
25934                     automatically resized.
25935     Y1          -   output buffer, possibly preallocated. In case  buffer
25936                     size is too small to store  result,  this  buffer  is
25937                     automatically resized.
25938 
25939 OUTPUT PARAMETERS
25940     Y0          -   array[N], S*x
25941     Y1          -   array[N], S^T*x
25942 
25943 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
25944 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
25945 this function.
25946 
25947   -- ALGLIB PROJECT --
25948      Copyright 14.10.2011 by Bochkanov Sergey
25949 *************************************************************************/
sparsemv2(sparsematrix * s,ae_vector * x,ae_vector * y0,ae_vector * y1,ae_state * _state)25950 void sparsemv2(sparsematrix* s,
25951      /* Real    */ ae_vector* x,
25952      /* Real    */ ae_vector* y0,
25953      /* Real    */ ae_vector* y1,
25954      ae_state *_state)
25955 {
25956     ae_int_t l;
25957     double tval;
25958     ae_int_t i;
25959     ae_int_t j;
25960     double vx;
25961     double vs;
25962     double v;
25963     double vv;
25964     double vd0;
25965     double vd1;
25966     ae_int_t vi;
25967     ae_int_t j0;
25968     ae_int_t j1;
25969     ae_int_t n;
25970     ae_int_t ri;
25971     ae_int_t ri1;
25972     ae_int_t d;
25973     ae_int_t u;
25974     ae_int_t lt;
25975     ae_int_t rt;
25976     ae_int_t lt1;
25977     ae_int_t rt1;
25978 
25979 
25980     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMV2: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
25981     ae_assert(s->m==s->n, "SparseMV2: matrix is non-square", _state);
25982     l = x->cnt;
25983     ae_assert(l>=s->n, "SparseMV2: Length(X)<N", _state);
25984     n = s->n;
25985     rvectorsetlengthatleast(y0, l, _state);
25986     rvectorsetlengthatleast(y1, l, _state);
25987     for(i=0; i<=n-1; i++)
25988     {
25989         y0->ptr.p_double[i] = (double)(0);
25990         y1->ptr.p_double[i] = (double)(0);
25991     }
25992     if( s->matrixtype==1 )
25993     {
25994 
25995         /*
25996          * CRS format
25997          */
25998         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseMV2: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
25999         for(i=0; i<=s->m-1; i++)
26000         {
26001             tval = (double)(0);
26002             vx = x->ptr.p_double[i];
26003             j0 = s->ridx.ptr.p_int[i];
26004             j1 = s->ridx.ptr.p_int[i+1]-1;
26005             for(j=j0; j<=j1; j++)
26006             {
26007                 vi = s->idx.ptr.p_int[j];
26008                 vs = s->vals.ptr.p_double[j];
26009                 tval = tval+x->ptr.p_double[vi]*vs;
26010                 y1->ptr.p_double[vi] = y1->ptr.p_double[vi]+vx*vs;
26011             }
26012             y0->ptr.p_double[i] = tval;
26013         }
26014         return;
26015     }
26016     if( s->matrixtype==2 )
26017     {
26018 
26019         /*
26020          * SKS format
26021          */
26022         for(i=0; i<=n-1; i++)
26023         {
26024             ri = s->ridx.ptr.p_int[i];
26025             ri1 = s->ridx.ptr.p_int[i+1];
26026             d = s->didx.ptr.p_int[i];
26027             u = s->uidx.ptr.p_int[i];
26028             vd0 = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
26029             vd1 = vd0;
26030             if( d>0 )
26031             {
26032                 lt = ri;
26033                 rt = ri+d-1;
26034                 lt1 = i-d;
26035                 rt1 = i-1;
26036                 v = x->ptr.p_double[i];
26037                 ae_v_addd(&y1->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
26038                 vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
26039                 vd0 = vd0+vv;
26040             }
26041             if( u>0 )
26042             {
26043                 lt = ri1-u;
26044                 rt = ri1-1;
26045                 lt1 = i-u;
26046                 rt1 = i-1;
26047                 v = x->ptr.p_double[i];
26048                 ae_v_addd(&y0->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
26049                 vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
26050                 vd1 = vd1+vv;
26051             }
26052             y0->ptr.p_double[i] = vd0;
26053             y1->ptr.p_double[i] = vd1;
26054         }
26055         return;
26056     }
26057 }
26058 
26059 
26060 /*************************************************************************
26061 This function calculates matrix-vector product  S*x, when S is  symmetric
26062 matrix. Matrix S  must be stored in CRS or SKS format  (exception will be
26063 thrown otherwise).
26064 
26065 INPUT PARAMETERS
26066     S           -   sparse M*M matrix in CRS or SKS format.
26067     IsUpper     -   whether upper or lower triangle of S is given:
26068                     * if upper triangle is given,  only   S[i,j] for j>=i
26069                       are used, and lower triangle is ignored (it can  be
26070                       empty - these elements are not referenced at all).
26071                     * if lower triangle is given,  only   S[i,j] for j<=i
26072                       are used, and upper triangle is ignored.
26073     X           -   array[N], input vector. For  performance  reasons  we
26074                     make only quick checks - we check that array size  is
26075                     at least N, but we do not check for NAN's or INF's.
26076     Y           -   output buffer, possibly preallocated. In case  buffer
26077                     size is too small to store  result,  this  buffer  is
26078                     automatically resized.
26079 
26080 OUTPUT PARAMETERS
26081     Y           -   array[M], S*x
26082 
26083 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
26084 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
26085 this function.
26086 
26087   -- ALGLIB PROJECT --
26088      Copyright 14.10.2011 by Bochkanov Sergey
26089 *************************************************************************/
sparsesmv(sparsematrix * s,ae_bool isupper,ae_vector * x,ae_vector * y,ae_state * _state)26090 void sparsesmv(sparsematrix* s,
26091      ae_bool isupper,
26092      /* Real    */ ae_vector* x,
26093      /* Real    */ ae_vector* y,
26094      ae_state *_state)
26095 {
26096     ae_int_t n;
26097     ae_int_t i;
26098     ae_int_t j;
26099     ae_int_t id;
26100     ae_int_t lt;
26101     ae_int_t rt;
26102     double v;
26103     double vv;
26104     double vy;
26105     double vx;
26106     double vd;
26107     ae_int_t ri;
26108     ae_int_t ri1;
26109     ae_int_t d;
26110     ae_int_t u;
26111     ae_int_t lt1;
26112     ae_int_t rt1;
26113 
26114 
26115     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseSMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
26116     ae_assert(x->cnt>=s->n, "SparseSMV: length(X)<N", _state);
26117     ae_assert(s->m==s->n, "SparseSMV: non-square matrix", _state);
26118     n = s->n;
26119     rvectorsetlengthatleast(y, n, _state);
26120     for(i=0; i<=n-1; i++)
26121     {
26122         y->ptr.p_double[i] = (double)(0);
26123     }
26124     if( s->matrixtype==1 )
26125     {
26126 
26127         /*
26128          * CRS format
26129          */
26130         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseSMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
26131         for(i=0; i<=n-1; i++)
26132         {
26133             if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
26134             {
26135                 y->ptr.p_double[i] = y->ptr.p_double[i]+s->vals.ptr.p_double[s->didx.ptr.p_int[i]]*x->ptr.p_double[s->idx.ptr.p_int[s->didx.ptr.p_int[i]]];
26136             }
26137             if( isupper )
26138             {
26139                 lt = s->uidx.ptr.p_int[i];
26140                 rt = s->ridx.ptr.p_int[i+1];
26141                 vy = (double)(0);
26142                 vx = x->ptr.p_double[i];
26143                 for(j=lt; j<=rt-1; j++)
26144                 {
26145                     id = s->idx.ptr.p_int[j];
26146                     v = s->vals.ptr.p_double[j];
26147                     vy = vy+x->ptr.p_double[id]*v;
26148                     y->ptr.p_double[id] = y->ptr.p_double[id]+vx*v;
26149                 }
26150                 y->ptr.p_double[i] = y->ptr.p_double[i]+vy;
26151             }
26152             else
26153             {
26154                 lt = s->ridx.ptr.p_int[i];
26155                 rt = s->didx.ptr.p_int[i];
26156                 vy = (double)(0);
26157                 vx = x->ptr.p_double[i];
26158                 for(j=lt; j<=rt-1; j++)
26159                 {
26160                     id = s->idx.ptr.p_int[j];
26161                     v = s->vals.ptr.p_double[j];
26162                     vy = vy+x->ptr.p_double[id]*v;
26163                     y->ptr.p_double[id] = y->ptr.p_double[id]+vx*v;
26164                 }
26165                 y->ptr.p_double[i] = y->ptr.p_double[i]+vy;
26166             }
26167         }
26168         return;
26169     }
26170     if( s->matrixtype==2 )
26171     {
26172 
26173         /*
26174          * SKS format
26175          */
26176         for(i=0; i<=n-1; i++)
26177         {
26178             ri = s->ridx.ptr.p_int[i];
26179             ri1 = s->ridx.ptr.p_int[i+1];
26180             d = s->didx.ptr.p_int[i];
26181             u = s->uidx.ptr.p_int[i];
26182             vd = s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
26183             if( d>0&&!isupper )
26184             {
26185                 lt = ri;
26186                 rt = ri+d-1;
26187                 lt1 = i-d;
26188                 rt1 = i-1;
26189                 v = x->ptr.p_double[i];
26190                 ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
26191                 vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
26192                 vd = vd+vv;
26193             }
26194             if( u>0&&isupper )
26195             {
26196                 lt = ri1-u;
26197                 rt = ri1-1;
26198                 lt1 = i-u;
26199                 rt1 = i-1;
26200                 v = x->ptr.p_double[i];
26201                 ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
26202                 vv = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
26203                 vd = vd+vv;
26204             }
26205             y->ptr.p_double[i] = vd;
26206         }
26207         return;
26208     }
26209 }
26210 
26211 
26212 /*************************************************************************
26213 This function calculates vector-matrix-vector product x'*S*x, where  S is
26214 symmetric matrix. Matrix S must be stored in CRS or SKS format (exception
26215 will be thrown otherwise).
26216 
26217 INPUT PARAMETERS
26218     S           -   sparse M*M matrix in CRS or SKS format.
26219     IsUpper     -   whether upper or lower triangle of S is given:
26220                     * if upper triangle is given,  only   S[i,j] for j>=i
26221                       are used, and lower triangle is ignored (it can  be
26222                       empty - these elements are not referenced at all).
26223                     * if lower triangle is given,  only   S[i,j] for j<=i
26224                       are used, and upper triangle is ignored.
26225     X           -   array[N], input vector. For  performance  reasons  we
26226                     make only quick checks - we check that array size  is
26227                     at least N, but we do not check for NAN's or INF's.
26228 
26229 RESULT
26230     x'*S*x
26231 
26232 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
26233 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
26234 this function.
26235 
26236   -- ALGLIB PROJECT --
26237      Copyright 27.01.2014 by Bochkanov Sergey
26238 *************************************************************************/
sparsevsmv(sparsematrix * s,ae_bool isupper,ae_vector * x,ae_state * _state)26239 double sparsevsmv(sparsematrix* s,
26240      ae_bool isupper,
26241      /* Real    */ ae_vector* x,
26242      ae_state *_state)
26243 {
26244     ae_int_t n;
26245     ae_int_t i;
26246     ae_int_t j;
26247     ae_int_t k;
26248     ae_int_t id;
26249     ae_int_t lt;
26250     ae_int_t rt;
26251     double v;
26252     double v0;
26253     double v1;
26254     ae_int_t ri;
26255     ae_int_t ri1;
26256     ae_int_t d;
26257     ae_int_t u;
26258     ae_int_t lt1;
26259     double result;
26260 
26261 
26262     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseVSMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
26263     ae_assert(x->cnt>=s->n, "SparseVSMV: length(X)<N", _state);
26264     ae_assert(s->m==s->n, "SparseVSMV: non-square matrix", _state);
26265     n = s->n;
26266     result = 0.0;
26267     if( s->matrixtype==1 )
26268     {
26269 
26270         /*
26271          * CRS format
26272          */
26273         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseVSMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
26274         for(i=0; i<=n-1; i++)
26275         {
26276             if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
26277             {
26278                 v = x->ptr.p_double[s->idx.ptr.p_int[s->didx.ptr.p_int[i]]];
26279                 result = result+v*s->vals.ptr.p_double[s->didx.ptr.p_int[i]]*v;
26280             }
26281             if( isupper )
26282             {
26283                 lt = s->uidx.ptr.p_int[i];
26284                 rt = s->ridx.ptr.p_int[i+1];
26285             }
26286             else
26287             {
26288                 lt = s->ridx.ptr.p_int[i];
26289                 rt = s->didx.ptr.p_int[i];
26290             }
26291             v0 = x->ptr.p_double[i];
26292             for(j=lt; j<=rt-1; j++)
26293             {
26294                 id = s->idx.ptr.p_int[j];
26295                 v1 = x->ptr.p_double[id];
26296                 v = s->vals.ptr.p_double[j];
26297                 result = result+2*v0*v1*v;
26298             }
26299         }
26300         return result;
26301     }
26302     if( s->matrixtype==2 )
26303     {
26304 
26305         /*
26306          * SKS format
26307          */
26308         for(i=0; i<=n-1; i++)
26309         {
26310             ri = s->ridx.ptr.p_int[i];
26311             ri1 = s->ridx.ptr.p_int[i+1];
26312             d = s->didx.ptr.p_int[i];
26313             u = s->uidx.ptr.p_int[i];
26314             v = x->ptr.p_double[i];
26315             result = result+v*s->vals.ptr.p_double[ri+d]*v;
26316             if( d>0&&!isupper )
26317             {
26318                 lt = ri;
26319                 rt = ri+d-1;
26320                 lt1 = i-d;
26321                 k = d-1;
26322                 v0 = x->ptr.p_double[i];
26323                 v = 0.0;
26324                 for(j=0; j<=k; j++)
26325                 {
26326                     v = v+x->ptr.p_double[lt1+j]*s->vals.ptr.p_double[lt+j];
26327                 }
26328                 result = result+2*v0*v;
26329             }
26330             if( u>0&&isupper )
26331             {
26332                 lt = ri1-u;
26333                 rt = ri1-1;
26334                 lt1 = i-u;
26335                 k = u-1;
26336                 v0 = x->ptr.p_double[i];
26337                 v = 0.0;
26338                 for(j=0; j<=k; j++)
26339                 {
26340                     v = v+x->ptr.p_double[lt1+j]*s->vals.ptr.p_double[lt+j];
26341                 }
26342                 result = result+2*v0*v;
26343             }
26344         }
26345         return result;
26346     }
26347     return result;
26348 }
26349 
26350 
26351 /*************************************************************************
26352 This function calculates matrix-matrix product  S*A.  Matrix  S  must  be
26353 stored in CRS or SKS format (exception will be thrown otherwise).
26354 
26355 INPUT PARAMETERS
26356     S           -   sparse M*N matrix in CRS or SKS format.
26357     A           -   array[N][K], input dense matrix. For  performance reasons
26358                     we make only quick checks - we check that array size
26359                     is at least N, but we do not check for NAN's or INF's.
26360     K           -   number of columns of matrix (A).
26361     B           -   output buffer, possibly preallocated. In case  buffer
26362                     size is too small to store  result,  this  buffer  is
26363                     automatically resized.
26364 
26365 OUTPUT PARAMETERS
26366     B           -   array[M][K], S*A
26367 
26368 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
26369 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
26370 this function.
26371 
26372   -- ALGLIB PROJECT --
26373      Copyright 14.10.2011 by Bochkanov Sergey
26374 *************************************************************************/
sparsemm(sparsematrix * s,ae_matrix * a,ae_int_t k,ae_matrix * b,ae_state * _state)26375 void sparsemm(sparsematrix* s,
26376      /* Real    */ ae_matrix* a,
26377      ae_int_t k,
26378      /* Real    */ ae_matrix* b,
26379      ae_state *_state)
26380 {
26381     double tval;
26382     double v;
26383     ae_int_t id;
26384     ae_int_t i;
26385     ae_int_t j;
26386     ae_int_t k0;
26387     ae_int_t k1;
26388     ae_int_t lt;
26389     ae_int_t rt;
26390     ae_int_t m;
26391     ae_int_t n;
26392     ae_int_t ri;
26393     ae_int_t ri1;
26394     ae_int_t lt1;
26395     ae_int_t rt1;
26396     ae_int_t d;
26397     ae_int_t u;
26398     double vd;
26399 
26400 
26401     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMM: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
26402     ae_assert(a->rows>=s->n, "SparseMM: Rows(A)<N", _state);
26403     ae_assert(k>0, "SparseMM: K<=0", _state);
26404     m = s->m;
26405     n = s->n;
26406     k1 = k-1;
26407     rmatrixsetlengthatleast(b, m, k, _state);
26408     for(i=0; i<=m-1; i++)
26409     {
26410         for(j=0; j<=k-1; j++)
26411         {
26412             b->ptr.pp_double[i][j] = (double)(0);
26413         }
26414     }
26415     if( s->matrixtype==1 )
26416     {
26417 
26418         /*
26419          * CRS format
26420          */
26421         ae_assert(s->ninitialized==s->ridx.ptr.p_int[m], "SparseMM: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
26422         if( k<sparse_linalgswitch )
26423         {
26424             for(i=0; i<=m-1; i++)
26425             {
26426                 for(j=0; j<=k-1; j++)
26427                 {
26428                     tval = (double)(0);
26429                     lt = s->ridx.ptr.p_int[i];
26430                     rt = s->ridx.ptr.p_int[i+1];
26431                     for(k0=lt; k0<=rt-1; k0++)
26432                     {
26433                         tval = tval+s->vals.ptr.p_double[k0]*a->ptr.pp_double[s->idx.ptr.p_int[k0]][j];
26434                     }
26435                     b->ptr.pp_double[i][j] = tval;
26436                 }
26437             }
26438         }
26439         else
26440         {
26441             for(i=0; i<=m-1; i++)
26442             {
26443                 lt = s->ridx.ptr.p_int[i];
26444                 rt = s->ridx.ptr.p_int[i+1];
26445                 for(j=lt; j<=rt-1; j++)
26446                 {
26447                     id = s->idx.ptr.p_int[j];
26448                     v = s->vals.ptr.p_double[j];
26449                     ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[id][0], 1, ae_v_len(0,k-1), v);
26450                 }
26451             }
26452         }
26453         return;
26454     }
26455     if( s->matrixtype==2 )
26456     {
26457 
26458         /*
26459          * SKS format
26460          */
26461         ae_assert(m==n, "SparseMM: non-square SKS matrices are not supported", _state);
26462         for(i=0; i<=n-1; i++)
26463         {
26464             ri = s->ridx.ptr.p_int[i];
26465             ri1 = s->ridx.ptr.p_int[i+1];
26466             d = s->didx.ptr.p_int[i];
26467             u = s->uidx.ptr.p_int[i];
26468             if( d>0 )
26469             {
26470                 lt = ri;
26471                 rt = ri+d-1;
26472                 lt1 = i-d;
26473                 rt1 = i-1;
26474                 for(j=lt1; j<=rt1; j++)
26475                 {
26476                     v = s->vals.ptr.p_double[lt+(j-lt1)];
26477                     if( k<sparse_linalgswitch )
26478                     {
26479 
26480                         /*
26481                          * Use loop
26482                          */
26483                         for(k0=0; k0<=k1; k0++)
26484                         {
26485                             b->ptr.pp_double[i][k0] = b->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
26486                         }
26487                     }
26488                     else
26489                     {
26490 
26491                         /*
26492                          * Use vector operation
26493                          */
26494                         ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
26495                     }
26496                 }
26497             }
26498             if( u>0 )
26499             {
26500                 lt = ri1-u;
26501                 rt = ri1-1;
26502                 lt1 = i-u;
26503                 rt1 = i-1;
26504                 for(j=lt1; j<=rt1; j++)
26505                 {
26506                     v = s->vals.ptr.p_double[lt+(j-lt1)];
26507                     if( k<sparse_linalgswitch )
26508                     {
26509 
26510                         /*
26511                          * Use loop
26512                          */
26513                         for(k0=0; k0<=k1; k0++)
26514                         {
26515                             b->ptr.pp_double[j][k0] = b->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
26516                         }
26517                     }
26518                     else
26519                     {
26520 
26521                         /*
26522                          * Use vector operation
26523                          */
26524                         ae_v_addd(&b->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
26525                     }
26526                 }
26527             }
26528             vd = s->vals.ptr.p_double[ri+d];
26529             ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), vd);
26530         }
26531         return;
26532     }
26533 }
26534 
26535 
26536 /*************************************************************************
26537 This function calculates matrix-matrix product  S^T*A. Matrix S  must  be
26538 stored in CRS or SKS format (exception will be thrown otherwise).
26539 
26540 INPUT PARAMETERS
26541     S           -   sparse M*N matrix in CRS or SKS format.
26542     A           -   array[M][K], input dense matrix. For performance reasons
26543                     we make only quick checks - we check that array size  is
26544                     at least M, but we do not check for NAN's or INF's.
26545     K           -   number of columns of matrix (A).
26546     B           -   output buffer, possibly preallocated. In case  buffer
26547                     size is too small to store  result,  this  buffer  is
26548                     automatically resized.
26549 
26550 OUTPUT PARAMETERS
26551     B           -   array[N][K], S^T*A
26552 
26553 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
26554 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
26555 this function.
26556 
26557   -- ALGLIB PROJECT --
26558      Copyright 14.10.2011 by Bochkanov Sergey
26559 *************************************************************************/
sparsemtm(sparsematrix * s,ae_matrix * a,ae_int_t k,ae_matrix * b,ae_state * _state)26560 void sparsemtm(sparsematrix* s,
26561      /* Real    */ ae_matrix* a,
26562      ae_int_t k,
26563      /* Real    */ ae_matrix* b,
26564      ae_state *_state)
26565 {
26566     ae_int_t i;
26567     ae_int_t j;
26568     ae_int_t k0;
26569     ae_int_t k1;
26570     ae_int_t lt;
26571     ae_int_t rt;
26572     ae_int_t ct;
26573     double v;
26574     ae_int_t m;
26575     ae_int_t n;
26576     ae_int_t ri;
26577     ae_int_t ri1;
26578     ae_int_t lt1;
26579     ae_int_t rt1;
26580     ae_int_t d;
26581     ae_int_t u;
26582 
26583 
26584     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMTM: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
26585     ae_assert(a->rows>=s->m, "SparseMTM: Rows(A)<M", _state);
26586     ae_assert(k>0, "SparseMTM: K<=0", _state);
26587     m = s->m;
26588     n = s->n;
26589     k1 = k-1;
26590     rmatrixsetlengthatleast(b, n, k, _state);
26591     for(i=0; i<=n-1; i++)
26592     {
26593         for(j=0; j<=k-1; j++)
26594         {
26595             b->ptr.pp_double[i][j] = (double)(0);
26596         }
26597     }
26598     if( s->matrixtype==1 )
26599     {
26600 
26601         /*
26602          * CRS format
26603          */
26604         ae_assert(s->ninitialized==s->ridx.ptr.p_int[m], "SparseMTM: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
26605         if( k<sparse_linalgswitch )
26606         {
26607             for(i=0; i<=m-1; i++)
26608             {
26609                 lt = s->ridx.ptr.p_int[i];
26610                 rt = s->ridx.ptr.p_int[i+1];
26611                 for(k0=lt; k0<=rt-1; k0++)
26612                 {
26613                     v = s->vals.ptr.p_double[k0];
26614                     ct = s->idx.ptr.p_int[k0];
26615                     for(j=0; j<=k-1; j++)
26616                     {
26617                         b->ptr.pp_double[ct][j] = b->ptr.pp_double[ct][j]+v*a->ptr.pp_double[i][j];
26618                     }
26619                 }
26620             }
26621         }
26622         else
26623         {
26624             for(i=0; i<=m-1; i++)
26625             {
26626                 lt = s->ridx.ptr.p_int[i];
26627                 rt = s->ridx.ptr.p_int[i+1];
26628                 for(j=lt; j<=rt-1; j++)
26629                 {
26630                     v = s->vals.ptr.p_double[j];
26631                     ct = s->idx.ptr.p_int[j];
26632                     ae_v_addd(&b->ptr.pp_double[ct][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
26633                 }
26634             }
26635         }
26636         return;
26637     }
26638     if( s->matrixtype==2 )
26639     {
26640 
26641         /*
26642          * SKS format
26643          */
26644         ae_assert(m==n, "SparseMTM: non-square SKS matrices are not supported", _state);
26645         for(i=0; i<=n-1; i++)
26646         {
26647             ri = s->ridx.ptr.p_int[i];
26648             ri1 = s->ridx.ptr.p_int[i+1];
26649             d = s->didx.ptr.p_int[i];
26650             u = s->uidx.ptr.p_int[i];
26651             if( d>0 )
26652             {
26653                 lt = ri;
26654                 rt = ri+d-1;
26655                 lt1 = i-d;
26656                 rt1 = i-1;
26657                 for(j=lt1; j<=rt1; j++)
26658                 {
26659                     v = s->vals.ptr.p_double[lt+(j-lt1)];
26660                     if( k<sparse_linalgswitch )
26661                     {
26662 
26663                         /*
26664                          * Use loop
26665                          */
26666                         for(k0=0; k0<=k1; k0++)
26667                         {
26668                             b->ptr.pp_double[j][k0] = b->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
26669                         }
26670                     }
26671                     else
26672                     {
26673 
26674                         /*
26675                          * Use vector operation
26676                          */
26677                         ae_v_addd(&b->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
26678                     }
26679                 }
26680             }
26681             if( u>0 )
26682             {
26683                 lt = ri1-u;
26684                 rt = ri1-1;
26685                 lt1 = i-u;
26686                 rt1 = i-1;
26687                 for(j=lt1; j<=rt1; j++)
26688                 {
26689                     v = s->vals.ptr.p_double[lt+(j-lt1)];
26690                     if( k<sparse_linalgswitch )
26691                     {
26692 
26693                         /*
26694                          * Use loop
26695                          */
26696                         for(k0=0; k0<=k1; k0++)
26697                         {
26698                             b->ptr.pp_double[i][k0] = b->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
26699                         }
26700                     }
26701                     else
26702                     {
26703 
26704                         /*
26705                          * Use vector operation
26706                          */
26707                         ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
26708                     }
26709                 }
26710             }
26711             v = s->vals.ptr.p_double[ri+d];
26712             ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
26713         }
26714         return;
26715     }
26716 }
26717 
26718 
26719 /*************************************************************************
26720 This function simultaneously calculates two matrix-matrix products:
26721     S*A and S^T*A.
26722 S  must  be  square (non-rectangular) matrix stored in CRS or  SKS  format
26723 (exception will be thrown otherwise).
26724 
26725 INPUT PARAMETERS
26726     S           -   sparse N*N matrix in CRS or SKS format.
26727     A           -   array[N][K], input dense matrix. For performance reasons
26728                     we make only quick checks - we check that array size  is
26729                     at least N, but we do not check for NAN's or INF's.
26730     K           -   number of columns of matrix (A).
26731     B0          -   output buffer, possibly preallocated. In case  buffer
26732                     size is too small to store  result,  this  buffer  is
26733                     automatically resized.
26734     B1          -   output buffer, possibly preallocated. In case  buffer
26735                     size is too small to store  result,  this  buffer  is
26736                     automatically resized.
26737 
26738 OUTPUT PARAMETERS
26739     B0          -   array[N][K], S*A
26740     B1          -   array[N][K], S^T*A
26741 
26742 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
26743 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
26744 this function.
26745 
26746   -- ALGLIB PROJECT --
26747      Copyright 14.10.2011 by Bochkanov Sergey
26748 *************************************************************************/
sparsemm2(sparsematrix * s,ae_matrix * a,ae_int_t k,ae_matrix * b0,ae_matrix * b1,ae_state * _state)26749 void sparsemm2(sparsematrix* s,
26750      /* Real    */ ae_matrix* a,
26751      ae_int_t k,
26752      /* Real    */ ae_matrix* b0,
26753      /* Real    */ ae_matrix* b1,
26754      ae_state *_state)
26755 {
26756     ae_int_t i;
26757     ae_int_t j;
26758     ae_int_t k0;
26759     ae_int_t lt;
26760     ae_int_t rt;
26761     ae_int_t ct;
26762     double v;
26763     double tval;
26764     ae_int_t n;
26765     ae_int_t k1;
26766     ae_int_t ri;
26767     ae_int_t ri1;
26768     ae_int_t lt1;
26769     ae_int_t rt1;
26770     ae_int_t d;
26771     ae_int_t u;
26772 
26773 
26774     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseMM2: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
26775     ae_assert(s->m==s->n, "SparseMM2: matrix is non-square", _state);
26776     ae_assert(a->rows>=s->n, "SparseMM2: Rows(A)<N", _state);
26777     ae_assert(k>0, "SparseMM2: K<=0", _state);
26778     n = s->n;
26779     k1 = k-1;
26780     rmatrixsetlengthatleast(b0, n, k, _state);
26781     rmatrixsetlengthatleast(b1, n, k, _state);
26782     for(i=0; i<=n-1; i++)
26783     {
26784         for(j=0; j<=k-1; j++)
26785         {
26786             b1->ptr.pp_double[i][j] = (double)(0);
26787             b0->ptr.pp_double[i][j] = (double)(0);
26788         }
26789     }
26790     if( s->matrixtype==1 )
26791     {
26792 
26793         /*
26794          * CRS format
26795          */
26796         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseMM2: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
26797         if( k<sparse_linalgswitch )
26798         {
26799             for(i=0; i<=n-1; i++)
26800             {
26801                 for(j=0; j<=k-1; j++)
26802                 {
26803                     tval = (double)(0);
26804                     lt = s->ridx.ptr.p_int[i];
26805                     rt = s->ridx.ptr.p_int[i+1];
26806                     v = a->ptr.pp_double[i][j];
26807                     for(k0=lt; k0<=rt-1; k0++)
26808                     {
26809                         ct = s->idx.ptr.p_int[k0];
26810                         b1->ptr.pp_double[ct][j] = b1->ptr.pp_double[ct][j]+s->vals.ptr.p_double[k0]*v;
26811                         tval = tval+s->vals.ptr.p_double[k0]*a->ptr.pp_double[ct][j];
26812                     }
26813                     b0->ptr.pp_double[i][j] = tval;
26814                 }
26815             }
26816         }
26817         else
26818         {
26819             for(i=0; i<=n-1; i++)
26820             {
26821                 lt = s->ridx.ptr.p_int[i];
26822                 rt = s->ridx.ptr.p_int[i+1];
26823                 for(j=lt; j<=rt-1; j++)
26824                 {
26825                     v = s->vals.ptr.p_double[j];
26826                     ct = s->idx.ptr.p_int[j];
26827                     ae_v_addd(&b0->ptr.pp_double[i][0], 1, &a->ptr.pp_double[ct][0], 1, ae_v_len(0,k-1), v);
26828                     ae_v_addd(&b1->ptr.pp_double[ct][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
26829                 }
26830             }
26831         }
26832         return;
26833     }
26834     if( s->matrixtype==2 )
26835     {
26836 
26837         /*
26838          * SKS format
26839          */
26840         ae_assert(s->m==s->n, "SparseMM2: non-square SKS matrices are not supported", _state);
26841         for(i=0; i<=n-1; i++)
26842         {
26843             ri = s->ridx.ptr.p_int[i];
26844             ri1 = s->ridx.ptr.p_int[i+1];
26845             d = s->didx.ptr.p_int[i];
26846             u = s->uidx.ptr.p_int[i];
26847             if( d>0 )
26848             {
26849                 lt = ri;
26850                 rt = ri+d-1;
26851                 lt1 = i-d;
26852                 rt1 = i-1;
26853                 for(j=lt1; j<=rt1; j++)
26854                 {
26855                     v = s->vals.ptr.p_double[lt+(j-lt1)];
26856                     if( k<sparse_linalgswitch )
26857                     {
26858 
26859                         /*
26860                          * Use loop
26861                          */
26862                         for(k0=0; k0<=k1; k0++)
26863                         {
26864                             b0->ptr.pp_double[i][k0] = b0->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
26865                             b1->ptr.pp_double[j][k0] = b1->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
26866                         }
26867                     }
26868                     else
26869                     {
26870 
26871                         /*
26872                          * Use vector operation
26873                          */
26874                         ae_v_addd(&b0->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
26875                         ae_v_addd(&b1->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
26876                     }
26877                 }
26878             }
26879             if( u>0 )
26880             {
26881                 lt = ri1-u;
26882                 rt = ri1-1;
26883                 lt1 = i-u;
26884                 rt1 = i-1;
26885                 for(j=lt1; j<=rt1; j++)
26886                 {
26887                     v = s->vals.ptr.p_double[lt+(j-lt1)];
26888                     if( k<sparse_linalgswitch )
26889                     {
26890 
26891                         /*
26892                          * Use loop
26893                          */
26894                         for(k0=0; k0<=k1; k0++)
26895                         {
26896                             b0->ptr.pp_double[j][k0] = b0->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
26897                             b1->ptr.pp_double[i][k0] = b1->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
26898                         }
26899                     }
26900                     else
26901                     {
26902 
26903                         /*
26904                          * Use vector operation
26905                          */
26906                         ae_v_addd(&b0->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
26907                         ae_v_addd(&b1->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
26908                     }
26909                 }
26910             }
26911             v = s->vals.ptr.p_double[ri+d];
26912             ae_v_addd(&b0->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
26913             ae_v_addd(&b1->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
26914         }
26915         return;
26916     }
26917 }
26918 
26919 
26920 /*************************************************************************
26921 This function calculates matrix-matrix product  S*A, when S  is  symmetric
26922 matrix. Matrix S must be stored in CRS or SKS format  (exception  will  be
26923 thrown otherwise).
26924 
26925 INPUT PARAMETERS
26926     S           -   sparse M*M matrix in CRS or SKS format.
26927     IsUpper     -   whether upper or lower triangle of S is given:
26928                     * if upper triangle is given,  only   S[i,j] for j>=i
26929                       are used, and lower triangle is ignored (it can  be
26930                       empty - these elements are not referenced at all).
26931                     * if lower triangle is given,  only   S[i,j] for j<=i
26932                       are used, and upper triangle is ignored.
26933     A           -   array[N][K], input dense matrix. For performance reasons
26934                     we make only quick checks - we check that array size is
26935                     at least N, but we do not check for NAN's or INF's.
26936     K           -   number of columns of matrix (A).
26937     B           -   output buffer, possibly preallocated. In case  buffer
26938                     size is too small to store  result,  this  buffer  is
26939                     automatically resized.
26940 
26941 OUTPUT PARAMETERS
26942     B           -   array[M][K], S*A
26943 
26944 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
26945 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
26946 this function.
26947 
26948   -- ALGLIB PROJECT --
26949      Copyright 14.10.2011 by Bochkanov Sergey
26950 *************************************************************************/
sparsesmm(sparsematrix * s,ae_bool isupper,ae_matrix * a,ae_int_t k,ae_matrix * b,ae_state * _state)26951 void sparsesmm(sparsematrix* s,
26952      ae_bool isupper,
26953      /* Real    */ ae_matrix* a,
26954      ae_int_t k,
26955      /* Real    */ ae_matrix* b,
26956      ae_state *_state)
26957 {
26958     ae_int_t i;
26959     ae_int_t j;
26960     ae_int_t k0;
26961     ae_int_t id;
26962     ae_int_t k1;
26963     ae_int_t lt;
26964     ae_int_t rt;
26965     double v;
26966     double vb;
26967     double va;
26968     ae_int_t n;
26969     ae_int_t ri;
26970     ae_int_t ri1;
26971     ae_int_t lt1;
26972     ae_int_t rt1;
26973     ae_int_t d;
26974     ae_int_t u;
26975 
26976 
26977     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseSMM: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
26978     ae_assert(a->rows>=s->n, "SparseSMM: Rows(X)<N", _state);
26979     ae_assert(s->m==s->n, "SparseSMM: matrix is non-square", _state);
26980     n = s->n;
26981     k1 = k-1;
26982     rmatrixsetlengthatleast(b, n, k, _state);
26983     for(i=0; i<=n-1; i++)
26984     {
26985         for(j=0; j<=k-1; j++)
26986         {
26987             b->ptr.pp_double[i][j] = (double)(0);
26988         }
26989     }
26990     if( s->matrixtype==1 )
26991     {
26992 
26993         /*
26994          * CRS format
26995          */
26996         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseSMM: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
26997         if( k>sparse_linalgswitch )
26998         {
26999             for(i=0; i<=n-1; i++)
27000             {
27001                 for(j=0; j<=k-1; j++)
27002                 {
27003                     if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
27004                     {
27005                         id = s->didx.ptr.p_int[i];
27006                         b->ptr.pp_double[i][j] = b->ptr.pp_double[i][j]+s->vals.ptr.p_double[id]*a->ptr.pp_double[s->idx.ptr.p_int[id]][j];
27007                     }
27008                     if( isupper )
27009                     {
27010                         lt = s->uidx.ptr.p_int[i];
27011                         rt = s->ridx.ptr.p_int[i+1];
27012                         vb = (double)(0);
27013                         va = a->ptr.pp_double[i][j];
27014                         for(k0=lt; k0<=rt-1; k0++)
27015                         {
27016                             id = s->idx.ptr.p_int[k0];
27017                             v = s->vals.ptr.p_double[k0];
27018                             vb = vb+a->ptr.pp_double[id][j]*v;
27019                             b->ptr.pp_double[id][j] = b->ptr.pp_double[id][j]+va*v;
27020                         }
27021                         b->ptr.pp_double[i][j] = b->ptr.pp_double[i][j]+vb;
27022                     }
27023                     else
27024                     {
27025                         lt = s->ridx.ptr.p_int[i];
27026                         rt = s->didx.ptr.p_int[i];
27027                         vb = (double)(0);
27028                         va = a->ptr.pp_double[i][j];
27029                         for(k0=lt; k0<=rt-1; k0++)
27030                         {
27031                             id = s->idx.ptr.p_int[k0];
27032                             v = s->vals.ptr.p_double[k0];
27033                             vb = vb+a->ptr.pp_double[id][j]*v;
27034                             b->ptr.pp_double[id][j] = b->ptr.pp_double[id][j]+va*v;
27035                         }
27036                         b->ptr.pp_double[i][j] = b->ptr.pp_double[i][j]+vb;
27037                     }
27038                 }
27039             }
27040         }
27041         else
27042         {
27043             for(i=0; i<=n-1; i++)
27044             {
27045                 if( s->didx.ptr.p_int[i]!=s->uidx.ptr.p_int[i] )
27046                 {
27047                     id = s->didx.ptr.p_int[i];
27048                     v = s->vals.ptr.p_double[id];
27049                     ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[s->idx.ptr.p_int[id]][0], 1, ae_v_len(0,k-1), v);
27050                 }
27051                 if( isupper )
27052                 {
27053                     lt = s->uidx.ptr.p_int[i];
27054                     rt = s->ridx.ptr.p_int[i+1];
27055                     for(j=lt; j<=rt-1; j++)
27056                     {
27057                         id = s->idx.ptr.p_int[j];
27058                         v = s->vals.ptr.p_double[j];
27059                         ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[id][0], 1, ae_v_len(0,k-1), v);
27060                         ae_v_addd(&b->ptr.pp_double[id][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
27061                     }
27062                 }
27063                 else
27064                 {
27065                     lt = s->ridx.ptr.p_int[i];
27066                     rt = s->didx.ptr.p_int[i];
27067                     for(j=lt; j<=rt-1; j++)
27068                     {
27069                         id = s->idx.ptr.p_int[j];
27070                         v = s->vals.ptr.p_double[j];
27071                         ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[id][0], 1, ae_v_len(0,k-1), v);
27072                         ae_v_addd(&b->ptr.pp_double[id][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
27073                     }
27074                 }
27075             }
27076         }
27077         return;
27078     }
27079     if( s->matrixtype==2 )
27080     {
27081 
27082         /*
27083          * SKS format
27084          */
27085         ae_assert(s->m==s->n, "SparseMM2: non-square SKS matrices are not supported", _state);
27086         for(i=0; i<=n-1; i++)
27087         {
27088             ri = s->ridx.ptr.p_int[i];
27089             ri1 = s->ridx.ptr.p_int[i+1];
27090             d = s->didx.ptr.p_int[i];
27091             u = s->uidx.ptr.p_int[i];
27092             if( d>0&&!isupper )
27093             {
27094                 lt = ri;
27095                 rt = ri+d-1;
27096                 lt1 = i-d;
27097                 rt1 = i-1;
27098                 for(j=lt1; j<=rt1; j++)
27099                 {
27100                     v = s->vals.ptr.p_double[lt+(j-lt1)];
27101                     if( k<sparse_linalgswitch )
27102                     {
27103 
27104                         /*
27105                          * Use loop
27106                          */
27107                         for(k0=0; k0<=k1; k0++)
27108                         {
27109                             b->ptr.pp_double[i][k0] = b->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
27110                             b->ptr.pp_double[j][k0] = b->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
27111                         }
27112                     }
27113                     else
27114                     {
27115 
27116                         /*
27117                          * Use vector operation
27118                          */
27119                         ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
27120                         ae_v_addd(&b->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
27121                     }
27122                 }
27123             }
27124             if( u>0&&isupper )
27125             {
27126                 lt = ri1-u;
27127                 rt = ri1-1;
27128                 lt1 = i-u;
27129                 rt1 = i-1;
27130                 for(j=lt1; j<=rt1; j++)
27131                 {
27132                     v = s->vals.ptr.p_double[lt+(j-lt1)];
27133                     if( k<sparse_linalgswitch )
27134                     {
27135 
27136                         /*
27137                          * Use loop
27138                          */
27139                         for(k0=0; k0<=k1; k0++)
27140                         {
27141                             b->ptr.pp_double[j][k0] = b->ptr.pp_double[j][k0]+v*a->ptr.pp_double[i][k0];
27142                             b->ptr.pp_double[i][k0] = b->ptr.pp_double[i][k0]+v*a->ptr.pp_double[j][k0];
27143                         }
27144                     }
27145                     else
27146                     {
27147 
27148                         /*
27149                          * Use vector operation
27150                          */
27151                         ae_v_addd(&b->ptr.pp_double[j][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
27152                         ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[j][0], 1, ae_v_len(0,k-1), v);
27153                     }
27154                 }
27155             }
27156             v = s->vals.ptr.p_double[ri+d];
27157             ae_v_addd(&b->ptr.pp_double[i][0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,k-1), v);
27158         }
27159         return;
27160     }
27161 }
27162 
27163 
27164 /*************************************************************************
27165 This function calculates matrix-vector product op(S)*x, when x is  vector,
27166 S is symmetric triangular matrix, op(S) is transposition or no  operation.
27167 Matrix S must be stored in CRS or SKS format  (exception  will  be  thrown
27168 otherwise).
27169 
27170 INPUT PARAMETERS
27171     S           -   sparse square matrix in CRS or SKS format.
27172     IsUpper     -   whether upper or lower triangle of S is used:
27173                     * if upper triangle is given,  only   S[i,j] for  j>=i
27174                       are used, and lower triangle is  ignored (it can  be
27175                       empty - these elements are not referenced at all).
27176                     * if lower triangle is given,  only   S[i,j] for  j<=i
27177                       are used, and upper triangle is ignored.
27178     IsUnit      -   unit or non-unit diagonal:
27179                     * if True, diagonal elements of triangular matrix  are
27180                       considered equal to 1.0. Actual elements  stored  in
27181                       S are not referenced at all.
27182                     * if False, diagonal stored in S is used
27183     OpType      -   operation type:
27184                     * if 0, S*x is calculated
27185                     * if 1, (S^T)*x is calculated (transposition)
27186     X           -   array[N] which stores input  vector.  For  performance
27187                     reasons we make only quick  checks  -  we  check  that
27188                     array  size  is  at  least  N, but we do not check for
27189                     NAN's or INF's.
27190     Y           -   possibly  preallocated  input   buffer.  Automatically
27191                     resized if its size is too small.
27192 
27193 OUTPUT PARAMETERS
27194     Y           -   array[N], op(S)*x
27195 
27196 NOTE: this function throws exception when called for non-CRS/SKS  matrix.
27197 You must convert your matrix with SparseConvertToCRS/SKS()  before  using
27198 this function.
27199 
27200   -- ALGLIB PROJECT --
27201      Copyright 20.01.2014 by Bochkanov Sergey
27202 *************************************************************************/
sparsetrmv(sparsematrix * s,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_vector * x,ae_vector * y,ae_state * _state)27203 void sparsetrmv(sparsematrix* s,
27204      ae_bool isupper,
27205      ae_bool isunit,
27206      ae_int_t optype,
27207      /* Real    */ ae_vector* x,
27208      /* Real    */ ae_vector* y,
27209      ae_state *_state)
27210 {
27211     ae_int_t n;
27212     ae_int_t i;
27213     ae_int_t j;
27214     ae_int_t k;
27215     ae_int_t j0;
27216     ae_int_t j1;
27217     double v;
27218     ae_int_t ri;
27219     ae_int_t ri1;
27220     ae_int_t d;
27221     ae_int_t u;
27222     ae_int_t lt;
27223     ae_int_t rt;
27224     ae_int_t lt1;
27225     ae_int_t rt1;
27226 
27227 
27228     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseTRMV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
27229     ae_assert(optype==0||optype==1, "SparseTRMV: incorrect operation type (must be 0 or 1)", _state);
27230     ae_assert(x->cnt>=s->n, "SparseTRMV: Length(X)<N", _state);
27231     ae_assert(s->m==s->n, "SparseTRMV: matrix is non-square", _state);
27232     n = s->n;
27233     rvectorsetlengthatleast(y, n, _state);
27234     if( isunit )
27235     {
27236 
27237         /*
27238          * Set initial value of y to x
27239          */
27240         for(i=0; i<=n-1; i++)
27241         {
27242             y->ptr.p_double[i] = x->ptr.p_double[i];
27243         }
27244     }
27245     else
27246     {
27247 
27248         /*
27249          * Set initial value of y to 0
27250          */
27251         for(i=0; i<=n-1; i++)
27252         {
27253             y->ptr.p_double[i] = (double)(0);
27254         }
27255     }
27256     if( s->matrixtype==1 )
27257     {
27258 
27259         /*
27260          * CRS format
27261          */
27262         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseTRMV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
27263         for(i=0; i<=n-1; i++)
27264         {
27265 
27266             /*
27267              * Depending on IsUpper/IsUnit, select range of indexes to process
27268              */
27269             if( isupper )
27270             {
27271                 if( isunit||s->didx.ptr.p_int[i]==s->uidx.ptr.p_int[i] )
27272                 {
27273                     j0 = s->uidx.ptr.p_int[i];
27274                 }
27275                 else
27276                 {
27277                     j0 = s->didx.ptr.p_int[i];
27278                 }
27279                 j1 = s->ridx.ptr.p_int[i+1]-1;
27280             }
27281             else
27282             {
27283                 j0 = s->ridx.ptr.p_int[i];
27284                 if( isunit||s->didx.ptr.p_int[i]==s->uidx.ptr.p_int[i] )
27285                 {
27286                     j1 = s->didx.ptr.p_int[i]-1;
27287                 }
27288                 else
27289                 {
27290                     j1 = s->didx.ptr.p_int[i];
27291                 }
27292             }
27293 
27294             /*
27295              * Depending on OpType, process subset of I-th row of input matrix
27296              */
27297             if( optype==0 )
27298             {
27299                 v = 0.0;
27300                 for(j=j0; j<=j1; j++)
27301                 {
27302                     v = v+s->vals.ptr.p_double[j]*x->ptr.p_double[s->idx.ptr.p_int[j]];
27303                 }
27304                 y->ptr.p_double[i] = y->ptr.p_double[i]+v;
27305             }
27306             else
27307             {
27308                 v = x->ptr.p_double[i];
27309                 for(j=j0; j<=j1; j++)
27310                 {
27311                     k = s->idx.ptr.p_int[j];
27312                     y->ptr.p_double[k] = y->ptr.p_double[k]+v*s->vals.ptr.p_double[j];
27313                 }
27314             }
27315         }
27316         return;
27317     }
27318     if( s->matrixtype==2 )
27319     {
27320 
27321         /*
27322          * SKS format
27323          */
27324         ae_assert(s->m==s->n, "SparseTRMV: non-square SKS matrices are not supported", _state);
27325         for(i=0; i<=n-1; i++)
27326         {
27327             ri = s->ridx.ptr.p_int[i];
27328             ri1 = s->ridx.ptr.p_int[i+1];
27329             d = s->didx.ptr.p_int[i];
27330             u = s->uidx.ptr.p_int[i];
27331             if( !isunit )
27332             {
27333                 y->ptr.p_double[i] = y->ptr.p_double[i]+s->vals.ptr.p_double[ri+d]*x->ptr.p_double[i];
27334             }
27335             if( d>0&&!isupper )
27336             {
27337                 lt = ri;
27338                 rt = ri+d-1;
27339                 lt1 = i-d;
27340                 rt1 = i-1;
27341                 if( optype==0 )
27342                 {
27343                     v = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
27344                     y->ptr.p_double[i] = y->ptr.p_double[i]+v;
27345                 }
27346                 else
27347                 {
27348                     v = x->ptr.p_double[i];
27349                     ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
27350                 }
27351             }
27352             if( u>0&&isupper )
27353             {
27354                 lt = ri1-u;
27355                 rt = ri1-1;
27356                 lt1 = i-u;
27357                 rt1 = i-1;
27358                 if( optype==0 )
27359                 {
27360                     v = x->ptr.p_double[i];
27361                     ae_v_addd(&y->ptr.p_double[lt1], 1, &s->vals.ptr.p_double[lt], 1, ae_v_len(lt1,rt1), v);
27362                 }
27363                 else
27364                 {
27365                     v = ae_v_dotproduct(&s->vals.ptr.p_double[lt], 1, &x->ptr.p_double[lt1], 1, ae_v_len(lt,rt));
27366                     y->ptr.p_double[i] = y->ptr.p_double[i]+v;
27367                 }
27368             }
27369         }
27370         return;
27371     }
27372 }
27373 
27374 
27375 /*************************************************************************
27376 This function solves linear system op(S)*y=x  where  x  is  vector,  S  is
27377 symmetric  triangular  matrix,  op(S)  is  transposition  or no operation.
27378 Matrix S must be stored in CRS or SKS format  (exception  will  be  thrown
27379 otherwise).
27380 
27381 INPUT PARAMETERS
27382     S           -   sparse square matrix in CRS or SKS format.
27383     IsUpper     -   whether upper or lower triangle of S is used:
27384                     * if upper triangle is given,  only   S[i,j] for  j>=i
27385                       are used, and lower triangle is  ignored (it can  be
27386                       empty - these elements are not referenced at all).
27387                     * if lower triangle is given,  only   S[i,j] for  j<=i
27388                       are used, and upper triangle is ignored.
27389     IsUnit      -   unit or non-unit diagonal:
27390                     * if True, diagonal elements of triangular matrix  are
27391                       considered equal to 1.0. Actual elements  stored  in
27392                       S are not referenced at all.
27393                     * if False, diagonal stored in S is used. It  is  your
27394                       responsibility  to  make  sure  that   diagonal   is
27395                       non-zero.
27396     OpType      -   operation type:
27397                     * if 0, S*x is calculated
27398                     * if 1, (S^T)*x is calculated (transposition)
27399     X           -   array[N] which stores input  vector.  For  performance
27400                     reasons we make only quick  checks  -  we  check  that
27401                     array  size  is  at  least  N, but we do not check for
27402                     NAN's or INF's.
27403 
27404 OUTPUT PARAMETERS
27405     X           -   array[N], inv(op(S))*x
27406 
27407 NOTE: this function throws exception when called for  non-CRS/SKS  matrix.
27408       You must convert your matrix  with  SparseConvertToCRS/SKS()  before
27409       using this function.
27410 
27411 NOTE: no assertion or tests are done during algorithm  operation.   It  is
27412       your responsibility to provide invertible matrix to algorithm.
27413 
27414   -- ALGLIB PROJECT --
27415      Copyright 20.01.2014 by Bochkanov Sergey
27416 *************************************************************************/
sparsetrsv(sparsematrix * s,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_vector * x,ae_state * _state)27417 void sparsetrsv(sparsematrix* s,
27418      ae_bool isupper,
27419      ae_bool isunit,
27420      ae_int_t optype,
27421      /* Real    */ ae_vector* x,
27422      ae_state *_state)
27423 {
27424     ae_int_t n;
27425     ae_int_t fst;
27426     ae_int_t lst;
27427     ae_int_t stp;
27428     ae_int_t i;
27429     ae_int_t j;
27430     ae_int_t k;
27431     double v;
27432     double vd;
27433     double v0;
27434     ae_int_t j0;
27435     ae_int_t j1;
27436     ae_int_t ri;
27437     ae_int_t ri1;
27438     ae_int_t d;
27439     ae_int_t u;
27440     ae_int_t lt;
27441     ae_int_t lt1;
27442 
27443 
27444     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseTRSV: incorrect matrix type (convert your matrix to CRS/SKS)", _state);
27445     ae_assert(optype==0||optype==1, "SparseTRSV: incorrect operation type (must be 0 or 1)", _state);
27446     ae_assert(x->cnt>=s->n, "SparseTRSV: Length(X)<N", _state);
27447     ae_assert(s->m==s->n, "SparseTRSV: matrix is non-square", _state);
27448     n = s->n;
27449     if( s->matrixtype==1 )
27450     {
27451 
27452         /*
27453          * CRS format.
27454          *
27455          * Several branches for different combinations of IsUpper and OpType
27456          */
27457         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseTRSV: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
27458         if( optype==0 )
27459         {
27460 
27461             /*
27462              * No transposition.
27463              *
27464              * S*x=y with upper or lower triangular S.
27465              */
27466             v0 = (double)(0);
27467             if( isupper )
27468             {
27469                 fst = n-1;
27470                 lst = 0;
27471                 stp = -1;
27472             }
27473             else
27474             {
27475                 fst = 0;
27476                 lst = n-1;
27477                 stp = 1;
27478             }
27479             i = fst;
27480             while((stp>0&&i<=lst)||(stp<0&&i>=lst))
27481             {
27482 
27483                 /*
27484                  * Select range of indexes to process
27485                  */
27486                 if( isupper )
27487                 {
27488                     j0 = s->uidx.ptr.p_int[i];
27489                     j1 = s->ridx.ptr.p_int[i+1]-1;
27490                 }
27491                 else
27492                 {
27493                     j0 = s->ridx.ptr.p_int[i];
27494                     j1 = s->didx.ptr.p_int[i]-1;
27495                 }
27496 
27497                 /*
27498                  * Calculate X[I]
27499                  */
27500                 v = 0.0;
27501                 for(j=j0; j<=j1; j++)
27502                 {
27503                     v = v+s->vals.ptr.p_double[j]*x->ptr.p_double[s->idx.ptr.p_int[j]];
27504                 }
27505                 if( !isunit )
27506                 {
27507                     if( s->didx.ptr.p_int[i]==s->uidx.ptr.p_int[i] )
27508                     {
27509                         vd = (double)(0);
27510                     }
27511                     else
27512                     {
27513                         vd = s->vals.ptr.p_double[s->didx.ptr.p_int[i]];
27514                     }
27515                 }
27516                 else
27517                 {
27518                     vd = 1.0;
27519                 }
27520                 v = (x->ptr.p_double[i]-v)/vd;
27521                 x->ptr.p_double[i] = v;
27522                 v0 = 0.25*v0+v;
27523 
27524                 /*
27525                  * Next I
27526                  */
27527                 i = i+stp;
27528             }
27529             ae_assert(ae_isfinite(v0, _state), "SparseTRSV: overflow or division by exact zero", _state);
27530             return;
27531         }
27532         if( optype==1 )
27533         {
27534 
27535             /*
27536              * Transposition.
27537              *
27538              * (S^T)*x=y with upper or lower triangular S.
27539              */
27540             if( isupper )
27541             {
27542                 fst = 0;
27543                 lst = n-1;
27544                 stp = 1;
27545             }
27546             else
27547             {
27548                 fst = n-1;
27549                 lst = 0;
27550                 stp = -1;
27551             }
27552             i = fst;
27553             v0 = (double)(0);
27554             while((stp>0&&i<=lst)||(stp<0&&i>=lst))
27555             {
27556                 v = x->ptr.p_double[i];
27557                 if( v!=0.0 )
27558                 {
27559 
27560                     /*
27561                      * X[i] already stores A[i,i]*Y[i], the only thing left
27562                      * is to divide by diagonal element.
27563                      */
27564                     if( !isunit )
27565                     {
27566                         if( s->didx.ptr.p_int[i]==s->uidx.ptr.p_int[i] )
27567                         {
27568                             vd = (double)(0);
27569                         }
27570                         else
27571                         {
27572                             vd = s->vals.ptr.p_double[s->didx.ptr.p_int[i]];
27573                         }
27574                     }
27575                     else
27576                     {
27577                         vd = 1.0;
27578                     }
27579                     v = v/vd;
27580                     x->ptr.p_double[i] = v;
27581                     v0 = 0.25*v0+v;
27582 
27583                     /*
27584                      * For upper triangular case:
27585                      *     subtract X[i]*Ai from X[i+1:N-1]
27586                      *
27587                      * For lower triangular case:
27588                      *     subtract X[i]*Ai from X[0:i-1]
27589                      *
27590                      * (here Ai is I-th row of original, untransposed A).
27591                      */
27592                     if( isupper )
27593                     {
27594                         j0 = s->uidx.ptr.p_int[i];
27595                         j1 = s->ridx.ptr.p_int[i+1]-1;
27596                     }
27597                     else
27598                     {
27599                         j0 = s->ridx.ptr.p_int[i];
27600                         j1 = s->didx.ptr.p_int[i]-1;
27601                     }
27602                     for(j=j0; j<=j1; j++)
27603                     {
27604                         k = s->idx.ptr.p_int[j];
27605                         x->ptr.p_double[k] = x->ptr.p_double[k]-s->vals.ptr.p_double[j]*v;
27606                     }
27607                 }
27608 
27609                 /*
27610                  * Next I
27611                  */
27612                 i = i+stp;
27613             }
27614             ae_assert(ae_isfinite(v0, _state), "SparseTRSV: overflow or division by exact zero", _state);
27615             return;
27616         }
27617         ae_assert(ae_false, "SparseTRSV: internal error", _state);
27618     }
27619     if( s->matrixtype==2 )
27620     {
27621 
27622         /*
27623          * SKS format
27624          */
27625         ae_assert(s->m==s->n, "SparseTRSV: non-square SKS matrices are not supported", _state);
27626         if( (optype==0&&!isupper)||(optype==1&&isupper) )
27627         {
27628 
27629             /*
27630              * Lower triangular op(S) (matrix itself can be upper triangular).
27631              */
27632             v0 = (double)(0);
27633             for(i=0; i<=n-1; i++)
27634             {
27635 
27636                 /*
27637                  * Select range of indexes to process
27638                  */
27639                 ri = s->ridx.ptr.p_int[i];
27640                 ri1 = s->ridx.ptr.p_int[i+1];
27641                 d = s->didx.ptr.p_int[i];
27642                 u = s->uidx.ptr.p_int[i];
27643                 if( isupper )
27644                 {
27645                     lt = i-u;
27646                     lt1 = ri1-u;
27647                     k = u-1;
27648                 }
27649                 else
27650                 {
27651                     lt = i-d;
27652                     lt1 = ri;
27653                     k = d-1;
27654                 }
27655 
27656                 /*
27657                  * Calculate X[I]
27658                  */
27659                 v = 0.0;
27660                 for(j=0; j<=k; j++)
27661                 {
27662                     v = v+s->vals.ptr.p_double[lt1+j]*x->ptr.p_double[lt+j];
27663                 }
27664                 if( isunit )
27665                 {
27666                     vd = (double)(1);
27667                 }
27668                 else
27669                 {
27670                     vd = s->vals.ptr.p_double[ri+d];
27671                 }
27672                 v = (x->ptr.p_double[i]-v)/vd;
27673                 x->ptr.p_double[i] = v;
27674                 v0 = 0.25*v0+v;
27675             }
27676             ae_assert(ae_isfinite(v0, _state), "SparseTRSV: overflow or division by exact zero", _state);
27677             return;
27678         }
27679         if( (optype==1&&!isupper)||(optype==0&&isupper) )
27680         {
27681 
27682             /*
27683              * Upper triangular op(S) (matrix itself can be lower triangular).
27684              */
27685             v0 = (double)(0);
27686             for(i=n-1; i>=0; i--)
27687             {
27688                 ri = s->ridx.ptr.p_int[i];
27689                 ri1 = s->ridx.ptr.p_int[i+1];
27690                 d = s->didx.ptr.p_int[i];
27691                 u = s->uidx.ptr.p_int[i];
27692 
27693                 /*
27694                  * X[i] already stores A[i,i]*Y[i], the only thing left
27695                  * is to divide by diagonal element.
27696                  */
27697                 if( isunit )
27698                 {
27699                     vd = (double)(1);
27700                 }
27701                 else
27702                 {
27703                     vd = s->vals.ptr.p_double[ri+d];
27704                 }
27705                 v = x->ptr.p_double[i]/vd;
27706                 x->ptr.p_double[i] = v;
27707                 v0 = 0.25*v0+v;
27708 
27709                 /*
27710                  * Subtract product of X[i] and I-th column of "effective" A from
27711                  * unprocessed variables.
27712                  */
27713                 v = x->ptr.p_double[i];
27714                 if( isupper )
27715                 {
27716                     lt = i-u;
27717                     lt1 = ri1-u;
27718                     k = u-1;
27719                 }
27720                 else
27721                 {
27722                     lt = i-d;
27723                     lt1 = ri;
27724                     k = d-1;
27725                 }
27726                 for(j=0; j<=k; j++)
27727                 {
27728                     x->ptr.p_double[lt+j] = x->ptr.p_double[lt+j]-v*s->vals.ptr.p_double[lt1+j];
27729                 }
27730             }
27731             ae_assert(ae_isfinite(v0, _state), "SparseTRSV: overflow or division by exact zero", _state);
27732             return;
27733         }
27734         ae_assert(ae_false, "SparseTRSV: internal error", _state);
27735     }
27736     ae_assert(ae_false, "SparseTRSV: internal error", _state);
27737 }
27738 
27739 
27740 /*************************************************************************
27741 This function applies permutation given by permutation table P (as opposed
27742 to product form of permutation) to sparse symmetric  matrix  A,  given  by
27743 either upper or lower triangle: B := P*A*P'.
27744 
27745 This function allocates completely new instance of B. Use buffered version
27746 SparseSymmPermTblBuf() if you want to reuse already allocated structure.
27747 
27748 INPUT PARAMETERS
27749     A           -   sparse square matrix in CRS format.
27750     IsUpper     -   whether upper or lower triangle of A is used:
27751                     * if upper triangle is given,  only   A[i,j] for  j>=i
27752                       are used, and lower triangle is  ignored (it can  be
27753                       empty - these elements are not referenced at all).
27754                     * if lower triangle is given,  only   A[i,j] for  j<=i
27755                       are used, and upper triangle is ignored.
27756     P           -   array[N] which stores permutation table;  P[I]=J means
27757                     that I-th row/column of matrix  A  is  moved  to  J-th
27758                     position. For performance reasons we do NOT check that
27759                     P[] is  a   correct   permutation  (that there  is  no
27760                     repetitions, just that all its elements  are  in [0,N)
27761                     range.
27762 
27763 OUTPUT PARAMETERS
27764     B           -   permuted matrix.  Permutation  is  applied  to A  from
27765                     the both sides, only upper or lower triangle (depending
27766                     on IsUpper) is stored.
27767 
27768 NOTE: this function throws exception when called for non-CRS  matrix.  You
27769       must convert your matrix with SparseConvertToCRS() before using this
27770       function.
27771 
27772   -- ALGLIB PROJECT --
27773      Copyright 05.10.2020 by Bochkanov Sergey.
27774 *************************************************************************/
sparsesymmpermtbl(sparsematrix * a,ae_bool isupper,ae_vector * p,sparsematrix * b,ae_state * _state)27775 void sparsesymmpermtbl(sparsematrix* a,
27776      ae_bool isupper,
27777      /* Integer */ ae_vector* p,
27778      sparsematrix* b,
27779      ae_state *_state)
27780 {
27781 
27782     _sparsematrix_clear(b);
27783 
27784     sparsesymmpermtblbuf(a, isupper, p, b, _state);
27785 }
27786 
27787 
27788 /*************************************************************************
27789 This function is a buffered version  of  SparseSymmPermTbl()  that  reuses
27790 previously allocated storage in B as much as possible.
27791 
27792 This function applies permutation given by permutation table P (as opposed
27793 to product form of permutation) to sparse symmetric  matrix  A,  given  by
27794 either upper or lower triangle: B := P*A*P'.
27795 
27796 INPUT PARAMETERS
27797     A           -   sparse square matrix in CRS format.
27798     IsUpper     -   whether upper or lower triangle of A is used:
27799                     * if upper triangle is given,  only   A[i,j] for  j>=i
27800                       are used, and lower triangle is  ignored (it can  be
27801                       empty - these elements are not referenced at all).
27802                     * if lower triangle is given,  only   A[i,j] for  j<=i
27803                       are used, and upper triangle is ignored.
27804     P           -   array[N] which stores permutation table;  P[I]=J means
27805                     that I-th row/column of matrix  A  is  moved  to  J-th
27806                     position. For performance reasons we do NOT check that
27807                     P[] is  a   correct   permutation  (that there  is  no
27808                     repetitions, just that all its elements  are  in [0,N)
27809                     range.
27810     B           -   sparse matrix object that will hold output.
27811                     Previously allocated memory will be reused as much  as
27812                     possible.
27813 
27814 OUTPUT PARAMETERS
27815     B           -   permuted matrix.  Permutation  is  applied  to A  from
27816                     the both sides, only upper or lower triangle (depending
27817                     on IsUpper) is stored.
27818 
27819 NOTE: this function throws exception when called for non-CRS  matrix.  You
27820       must convert your matrix with SparseConvertToCRS() before using this
27821       function.
27822 
27823   -- ALGLIB PROJECT --
27824      Copyright 05.10.2020 by Bochkanov Sergey.
27825 *************************************************************************/
sparsesymmpermtblbuf(sparsematrix * a,ae_bool isupper,ae_vector * p,sparsematrix * b,ae_state * _state)27826 void sparsesymmpermtblbuf(sparsematrix* a,
27827      ae_bool isupper,
27828      /* Integer */ ae_vector* p,
27829      sparsematrix* b,
27830      ae_state *_state)
27831 {
27832     ae_int_t i;
27833     ae_int_t j;
27834     ae_int_t jj;
27835     ae_int_t j0;
27836     ae_int_t j1;
27837     ae_int_t k0;
27838     ae_int_t k1;
27839     ae_int_t kk;
27840     ae_int_t n;
27841     ae_int_t dst;
27842     ae_bool bflag;
27843 
27844 
27845     ae_assert(a->matrixtype==1, "SparseSymmPermTblBuf: incorrect matrix type (convert your matrix to CRS)", _state);
27846     ae_assert(p->cnt>=a->n, "SparseSymmPermTblBuf: Length(P)<N", _state);
27847     ae_assert(a->m==a->n, "SparseSymmPermTblBuf: matrix is non-square", _state);
27848     bflag = ae_true;
27849     for(i=0; i<=a->n-1; i++)
27850     {
27851         bflag = (bflag&&p->ptr.p_int[i]>=0)&&p->ptr.p_int[i]<a->n;
27852     }
27853     ae_assert(bflag, "SparseSymmPermTblBuf: P[] contains values outside of [0,N) range", _state);
27854     n = a->n;
27855 
27856     /*
27857      * Prepare output
27858      */
27859     ae_assert(a->ninitialized==a->ridx.ptr.p_int[n], "SparseSymmPermTblBuf: integrity check failed", _state);
27860     b->matrixtype = 1;
27861     b->n = n;
27862     b->m = n;
27863     ivectorsetlengthatleast(&b->didx, n, _state);
27864     ivectorsetlengthatleast(&b->uidx, n, _state);
27865 
27866     /*
27867      * Determine row sizes (temporary stored in DIdx) and ranges
27868      */
27869     isetv(n, 0, &b->didx, _state);
27870     for(i=0; i<=n-1; i++)
27871     {
27872         if( isupper )
27873         {
27874             j0 = a->didx.ptr.p_int[i];
27875             j1 = a->ridx.ptr.p_int[i+1]-1;
27876             k0 = p->ptr.p_int[i];
27877             for(jj=j0; jj<=j1; jj++)
27878             {
27879                 k1 = p->ptr.p_int[a->idx.ptr.p_int[jj]];
27880                 if( k1<k0 )
27881                 {
27882                     b->didx.ptr.p_int[k1] = b->didx.ptr.p_int[k1]+1;
27883                 }
27884                 else
27885                 {
27886                     b->didx.ptr.p_int[k0] = b->didx.ptr.p_int[k0]+1;
27887                 }
27888             }
27889         }
27890         else
27891         {
27892             j0 = a->ridx.ptr.p_int[i];
27893             j1 = a->uidx.ptr.p_int[i]-1;
27894             k0 = p->ptr.p_int[i];
27895             for(jj=j0; jj<=j1; jj++)
27896             {
27897                 k1 = p->ptr.p_int[a->idx.ptr.p_int[jj]];
27898                 if( k1>k0 )
27899                 {
27900                     b->didx.ptr.p_int[k1] = b->didx.ptr.p_int[k1]+1;
27901                 }
27902                 else
27903                 {
27904                     b->didx.ptr.p_int[k0] = b->didx.ptr.p_int[k0]+1;
27905                 }
27906             }
27907         }
27908     }
27909     ivectorsetlengthatleast(&b->ridx, n+1, _state);
27910     b->ridx.ptr.p_int[0] = 0;
27911     for(i=0; i<=n-1; i++)
27912     {
27913         b->ridx.ptr.p_int[i+1] = b->ridx.ptr.p_int[i]+b->didx.ptr.p_int[i];
27914     }
27915     b->ninitialized = b->ridx.ptr.p_int[n];
27916     ivectorsetlengthatleast(&b->idx, b->ninitialized, _state);
27917     rvectorsetlengthatleast(&b->vals, b->ninitialized, _state);
27918 
27919     /*
27920      * Process matrix
27921      */
27922     for(i=0; i<=n-1; i++)
27923     {
27924         b->uidx.ptr.p_int[i] = b->ridx.ptr.p_int[i];
27925     }
27926     for(i=0; i<=n-1; i++)
27927     {
27928         if( isupper )
27929         {
27930             j0 = a->didx.ptr.p_int[i];
27931             j1 = a->ridx.ptr.p_int[i+1]-1;
27932             for(jj=j0; jj<=j1; jj++)
27933             {
27934                 j = a->idx.ptr.p_int[jj];
27935                 k0 = p->ptr.p_int[i];
27936                 k1 = p->ptr.p_int[j];
27937                 if( k1<k0 )
27938                 {
27939                     kk = k0;
27940                     k0 = k1;
27941                     k1 = kk;
27942                 }
27943                 dst = b->uidx.ptr.p_int[k0];
27944                 b->idx.ptr.p_int[dst] = k1;
27945                 b->vals.ptr.p_double[dst] = a->vals.ptr.p_double[jj];
27946                 b->uidx.ptr.p_int[k0] = dst+1;
27947             }
27948         }
27949         else
27950         {
27951             j0 = a->ridx.ptr.p_int[i];
27952             j1 = a->uidx.ptr.p_int[i]-1;
27953             for(jj=j0; jj<=j1; jj++)
27954             {
27955                 j = a->idx.ptr.p_int[jj];
27956                 k0 = p->ptr.p_int[i];
27957                 k1 = p->ptr.p_int[j];
27958                 if( k1>k0 )
27959                 {
27960                     kk = k0;
27961                     k0 = k1;
27962                     k1 = kk;
27963                 }
27964                 dst = b->uidx.ptr.p_int[k0];
27965                 b->idx.ptr.p_int[dst] = k1;
27966                 b->vals.ptr.p_double[dst] = a->vals.ptr.p_double[jj];
27967                 b->uidx.ptr.p_int[k0] = dst+1;
27968             }
27969         }
27970     }
27971 
27972     /*
27973      * Finalize matrix
27974      */
27975     for(i=0; i<=n-1; i++)
27976     {
27977         tagsortmiddleir(&b->idx, &b->vals, b->ridx.ptr.p_int[i], b->ridx.ptr.p_int[i+1]-b->ridx.ptr.p_int[i], _state);
27978     }
27979     sparseinitduidx(b, _state);
27980 }
27981 
27982 
27983 /*************************************************************************
27984 This procedure resizes Hash-Table matrix. It can be called when you  have
27985 deleted too many elements from the matrix, and you want to  free unneeded
27986 memory.
27987 
27988   -- ALGLIB PROJECT --
27989      Copyright 14.10.2011 by Bochkanov Sergey
27990 *************************************************************************/
sparseresizematrix(sparsematrix * s,ae_state * _state)27991 void sparseresizematrix(sparsematrix* s, ae_state *_state)
27992 {
27993     ae_frame _frame_block;
27994     ae_int_t k;
27995     ae_int_t k1;
27996     ae_int_t i;
27997     ae_vector tvals;
27998     ae_vector tidx;
27999 
28000     ae_frame_make(_state, &_frame_block);
28001     memset(&tvals, 0, sizeof(tvals));
28002     memset(&tidx, 0, sizeof(tidx));
28003     ae_vector_init(&tvals, 0, DT_REAL, _state, ae_true);
28004     ae_vector_init(&tidx, 0, DT_INT, _state, ae_true);
28005 
28006     ae_assert(s->matrixtype==0, "SparseResizeMatrix: incorrect matrix type", _state);
28007 
28008     /*
28009      * Initialization for length and number of non-null elementd
28010      */
28011     k = s->tablesize;
28012     k1 = 0;
28013 
28014     /*
28015      * Calculating number of non-null elements
28016      */
28017     for(i=0; i<=k-1; i++)
28018     {
28019         if( s->idx.ptr.p_int[2*i]>=0 )
28020         {
28021             k1 = k1+1;
28022         }
28023     }
28024 
28025     /*
28026      * Initialization value for free space
28027      */
28028     s->tablesize = ae_round(k1/sparse_desiredloadfactor*sparse_growfactor+sparse_additional, _state);
28029     s->nfree = s->tablesize-k1;
28030     ae_vector_set_length(&tvals, s->tablesize, _state);
28031     ae_vector_set_length(&tidx, 2*s->tablesize, _state);
28032     ae_swap_vectors(&s->vals, &tvals);
28033     ae_swap_vectors(&s->idx, &tidx);
28034     for(i=0; i<=s->tablesize-1; i++)
28035     {
28036         s->idx.ptr.p_int[2*i] = -1;
28037     }
28038     for(i=0; i<=k-1; i++)
28039     {
28040         if( tidx.ptr.p_int[2*i]>=0 )
28041         {
28042             sparseset(s, tidx.ptr.p_int[2*i], tidx.ptr.p_int[2*i+1], tvals.ptr.p_double[i], _state);
28043         }
28044     }
28045     ae_frame_leave(_state);
28046 }
28047 
28048 
28049 /*************************************************************************
28050 Procedure for initialization 'S.DIdx' and 'S.UIdx'
28051 
28052 
28053   -- ALGLIB PROJECT --
28054      Copyright 14.10.2011 by Bochkanov Sergey
28055 *************************************************************************/
sparseinitduidx(sparsematrix * s,ae_state * _state)28056 void sparseinitduidx(sparsematrix* s, ae_state *_state)
28057 {
28058     ae_int_t i;
28059     ae_int_t j;
28060     ae_int_t k;
28061     ae_int_t lt;
28062     ae_int_t rt;
28063 
28064 
28065     ae_assert(s->matrixtype==1, "SparseInitDUIdx: internal error, incorrect matrix type", _state);
28066     ivectorsetlengthatleast(&s->didx, s->m, _state);
28067     ivectorsetlengthatleast(&s->uidx, s->m, _state);
28068     for(i=0; i<=s->m-1; i++)
28069     {
28070         s->uidx.ptr.p_int[i] = -1;
28071         s->didx.ptr.p_int[i] = -1;
28072         lt = s->ridx.ptr.p_int[i];
28073         rt = s->ridx.ptr.p_int[i+1];
28074         for(j=lt; j<=rt-1; j++)
28075         {
28076             k = s->idx.ptr.p_int[j];
28077             if( k==i )
28078             {
28079                 s->didx.ptr.p_int[i] = j;
28080             }
28081             else
28082             {
28083                 if( k>i&&s->uidx.ptr.p_int[i]==-1 )
28084                 {
28085                     s->uidx.ptr.p_int[i] = j;
28086                     break;
28087                 }
28088             }
28089         }
28090         if( s->uidx.ptr.p_int[i]==-1 )
28091         {
28092             s->uidx.ptr.p_int[i] = s->ridx.ptr.p_int[i+1];
28093         }
28094         if( s->didx.ptr.p_int[i]==-1 )
28095         {
28096             s->didx.ptr.p_int[i] = s->uidx.ptr.p_int[i];
28097         }
28098     }
28099 }
28100 
28101 
28102 /*************************************************************************
28103 This function return average length of chain at hash-table.
28104 
28105   -- ALGLIB PROJECT --
28106      Copyright 14.10.2011 by Bochkanov Sergey
28107 *************************************************************************/
sparsegetaveragelengthofchain(sparsematrix * s,ae_state * _state)28108 double sparsegetaveragelengthofchain(sparsematrix* s, ae_state *_state)
28109 {
28110     ae_int_t nchains;
28111     ae_int_t talc;
28112     ae_int_t l;
28113     ae_int_t i;
28114     ae_int_t ind0;
28115     ae_int_t ind1;
28116     ae_int_t hashcode;
28117     double result;
28118 
28119 
28120 
28121     /*
28122      * If matrix represent in CRS then return zero and exit
28123      */
28124     if( s->matrixtype!=0 )
28125     {
28126         result = (double)(0);
28127         return result;
28128     }
28129     nchains = 0;
28130     talc = 0;
28131     l = s->tablesize;
28132     for(i=0; i<=l-1; i++)
28133     {
28134         ind0 = 2*i;
28135         if( s->idx.ptr.p_int[ind0]!=-1 )
28136         {
28137             nchains = nchains+1;
28138             hashcode = sparse_hash(s->idx.ptr.p_int[ind0], s->idx.ptr.p_int[ind0+1], l, _state);
28139             for(;;)
28140             {
28141                 talc = talc+1;
28142                 ind1 = 2*hashcode;
28143                 if( s->idx.ptr.p_int[ind0]==s->idx.ptr.p_int[ind1]&&s->idx.ptr.p_int[ind0+1]==s->idx.ptr.p_int[ind1+1] )
28144                 {
28145                     break;
28146                 }
28147                 hashcode = (hashcode+1)%l;
28148             }
28149         }
28150     }
28151     if( nchains==0 )
28152     {
28153         result = (double)(0);
28154     }
28155     else
28156     {
28157         result = (double)talc/(double)nchains;
28158     }
28159     return result;
28160 }
28161 
28162 
28163 /*************************************************************************
28164 This  function  is  used  to enumerate all elements of the sparse matrix.
28165 Before  first  call  user  initializes  T0 and T1 counters by zero. These
28166 counters are used to remember current position in a  matrix;  after  each
28167 call they are updated by the function.
28168 
28169 Subsequent calls to this function return non-zero elements of the  sparse
28170 matrix, one by one. If you enumerate CRS matrix, matrix is traversed from
28171 left to right, from top to bottom. In case you enumerate matrix stored as
28172 Hash table, elements are returned in random order.
28173 
28174 EXAMPLE
28175     > T0=0
28176     > T1=0
28177     > while SparseEnumerate(S,T0,T1,I,J,V) do
28178     >     ....do something with I,J,V
28179 
28180 INPUT PARAMETERS
28181     S           -   sparse M*N matrix in Hash-Table or CRS representation.
28182     T0          -   internal counter
28183     T1          -   internal counter
28184 
28185 OUTPUT PARAMETERS
28186     T0          -   new value of the internal counter
28187     T1          -   new value of the internal counter
28188     I           -   row index of non-zero element, 0<=I<M.
28189     J           -   column index of non-zero element, 0<=J<N
28190     V           -   value of the T-th element
28191 
28192 RESULT
28193     True in case of success (next non-zero element was retrieved)
28194     False in case all non-zero elements were enumerated
28195 
28196 NOTE: you may call SparseRewriteExisting() during enumeration, but it  is
28197       THE  ONLY  matrix  modification  function  you  can  call!!!  Other
28198       matrix modification functions should not be called during enumeration!
28199 
28200   -- ALGLIB PROJECT --
28201      Copyright 14.03.2012 by Bochkanov Sergey
28202 *************************************************************************/
sparseenumerate(sparsematrix * s,ae_int_t * t0,ae_int_t * t1,ae_int_t * i,ae_int_t * j,double * v,ae_state * _state)28203 ae_bool sparseenumerate(sparsematrix* s,
28204      ae_int_t* t0,
28205      ae_int_t* t1,
28206      ae_int_t* i,
28207      ae_int_t* j,
28208      double* v,
28209      ae_state *_state)
28210 {
28211     ae_int_t sz;
28212     ae_int_t i0;
28213     ae_bool result;
28214 
28215     *i = 0;
28216     *j = 0;
28217     *v = 0;
28218 
28219     result = ae_false;
28220     if( *t0<0||(s->matrixtype!=0&&*t1<0) )
28221     {
28222 
28223         /*
28224          * Incorrect T0/T1, terminate enumeration
28225          */
28226         result = ae_false;
28227         return result;
28228     }
28229     if( s->matrixtype==0 )
28230     {
28231 
28232         /*
28233          * Hash-table matrix
28234          */
28235         sz = s->tablesize;
28236         for(i0=*t0; i0<=sz-1; i0++)
28237         {
28238             if( s->idx.ptr.p_int[2*i0]==-1||s->idx.ptr.p_int[2*i0]==-2 )
28239             {
28240                 continue;
28241             }
28242             else
28243             {
28244                 *i = s->idx.ptr.p_int[2*i0];
28245                 *j = s->idx.ptr.p_int[2*i0+1];
28246                 *v = s->vals.ptr.p_double[i0];
28247                 *t0 = i0+1;
28248                 result = ae_true;
28249                 return result;
28250             }
28251         }
28252         *t0 = 0;
28253         *t1 = 0;
28254         result = ae_false;
28255         return result;
28256     }
28257     if( s->matrixtype==1 )
28258     {
28259 
28260         /*
28261          * CRS matrix
28262          */
28263         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseEnumerate: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
28264         if( *t0>=s->ninitialized )
28265         {
28266             *t0 = 0;
28267             *t1 = 0;
28268             result = ae_false;
28269             return result;
28270         }
28271         while(*t0>s->ridx.ptr.p_int[*t1+1]-1&&*t1<s->m)
28272         {
28273             *t1 = *t1+1;
28274         }
28275         *i = *t1;
28276         *j = s->idx.ptr.p_int[*t0];
28277         *v = s->vals.ptr.p_double[*t0];
28278         *t0 = *t0+1;
28279         result = ae_true;
28280         return result;
28281     }
28282     if( s->matrixtype==2 )
28283     {
28284 
28285         /*
28286          * SKS matrix:
28287          * * T0 stores current offset in Vals[] array
28288          * * T1 stores index of the diagonal block
28289          */
28290         ae_assert(s->m==s->n, "SparseEnumerate: non-square SKS matrices are not supported", _state);
28291         if( *t0>=s->ridx.ptr.p_int[s->m] )
28292         {
28293             *t0 = 0;
28294             *t1 = 0;
28295             result = ae_false;
28296             return result;
28297         }
28298         while(*t0>s->ridx.ptr.p_int[*t1+1]-1&&*t1<s->m)
28299         {
28300             *t1 = *t1+1;
28301         }
28302         i0 = *t0-s->ridx.ptr.p_int[*t1];
28303         if( i0<s->didx.ptr.p_int[*t1]+1 )
28304         {
28305 
28306             /*
28307              * subdiagonal or diagonal element, row index is T1.
28308              */
28309             *i = *t1;
28310             *j = *t1-s->didx.ptr.p_int[*t1]+i0;
28311         }
28312         else
28313         {
28314 
28315             /*
28316              * superdiagonal element, column index is T1.
28317              */
28318             *i = *t1-(s->ridx.ptr.p_int[*t1+1]-(*t0));
28319             *j = *t1;
28320         }
28321         *v = s->vals.ptr.p_double[*t0];
28322         *t0 = *t0+1;
28323         result = ae_true;
28324         return result;
28325     }
28326     ae_assert(ae_false, "SparseEnumerate: unexpected matrix type", _state);
28327     return result;
28328 }
28329 
28330 
28331 /*************************************************************************
28332 This function rewrites existing (non-zero) element. It  returns  True   if
28333 element  exists  or  False,  when  it  is  called for non-existing  (zero)
28334 element.
28335 
28336 This function works with any kind of the matrix.
28337 
28338 The purpose of this function is to provide convenient thread-safe  way  to
28339 modify  sparse  matrix.  Such  modification  (already  existing element is
28340 rewritten) is guaranteed to be thread-safe without any synchronization, as
28341 long as different threads modify different elements.
28342 
28343 INPUT PARAMETERS
28344     S           -   sparse M*N matrix in any kind of representation
28345                     (Hash, SKS, CRS).
28346     I           -   row index of non-zero element to modify, 0<=I<M
28347     J           -   column index of non-zero element to modify, 0<=J<N
28348     V           -   value to rewrite, must be finite number
28349 
28350 OUTPUT PARAMETERS
28351     S           -   modified matrix
28352 RESULT
28353     True in case when element exists
28354     False in case when element doesn't exist or it is zero
28355 
28356   -- ALGLIB PROJECT --
28357      Copyright 14.03.2012 by Bochkanov Sergey
28358 *************************************************************************/
sparserewriteexisting(sparsematrix * s,ae_int_t i,ae_int_t j,double v,ae_state * _state)28359 ae_bool sparserewriteexisting(sparsematrix* s,
28360      ae_int_t i,
28361      ae_int_t j,
28362      double v,
28363      ae_state *_state)
28364 {
28365     ae_int_t hashcode;
28366     ae_int_t k;
28367     ae_int_t k0;
28368     ae_int_t k1;
28369     ae_bool result;
28370 
28371 
28372     ae_assert(0<=i&&i<s->m, "SparseRewriteExisting: invalid argument I(either I<0 or I>=S.M)", _state);
28373     ae_assert(0<=j&&j<s->n, "SparseRewriteExisting: invalid argument J(either J<0 or J>=S.N)", _state);
28374     ae_assert(ae_isfinite(v, _state), "SparseRewriteExisting: invalid argument V(either V is infinite or V is NaN)", _state);
28375     result = ae_false;
28376 
28377     /*
28378      * Hash-table matrix
28379      */
28380     if( s->matrixtype==0 )
28381     {
28382         k = s->tablesize;
28383         hashcode = sparse_hash(i, j, k, _state);
28384         for(;;)
28385         {
28386             if( s->idx.ptr.p_int[2*hashcode]==-1 )
28387             {
28388                 return result;
28389             }
28390             if( s->idx.ptr.p_int[2*hashcode]==i&&s->idx.ptr.p_int[2*hashcode+1]==j )
28391             {
28392                 s->vals.ptr.p_double[hashcode] = v;
28393                 result = ae_true;
28394                 return result;
28395             }
28396             hashcode = (hashcode+1)%k;
28397         }
28398     }
28399 
28400     /*
28401      * CRS matrix
28402      */
28403     if( s->matrixtype==1 )
28404     {
28405         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseRewriteExisting: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
28406         k0 = s->ridx.ptr.p_int[i];
28407         k1 = s->ridx.ptr.p_int[i+1]-1;
28408         while(k0<=k1)
28409         {
28410             k = (k0+k1)/2;
28411             if( s->idx.ptr.p_int[k]==j )
28412             {
28413                 s->vals.ptr.p_double[k] = v;
28414                 result = ae_true;
28415                 return result;
28416             }
28417             if( s->idx.ptr.p_int[k]<j )
28418             {
28419                 k0 = k+1;
28420             }
28421             else
28422             {
28423                 k1 = k-1;
28424             }
28425         }
28426     }
28427 
28428     /*
28429      * SKS
28430      */
28431     if( s->matrixtype==2 )
28432     {
28433         ae_assert(s->m==s->n, "SparseRewriteExisting: non-square SKS matrix not supported", _state);
28434         if( i==j )
28435         {
28436 
28437             /*
28438              * Rewrite diagonal element
28439              */
28440             result = ae_true;
28441             s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+s->didx.ptr.p_int[i]] = v;
28442             return result;
28443         }
28444         if( j<i )
28445         {
28446 
28447             /*
28448              * Return subdiagonal element at I-th "skyline block"
28449              */
28450             k = s->didx.ptr.p_int[i];
28451             if( i-j<=k )
28452             {
28453                 s->vals.ptr.p_double[s->ridx.ptr.p_int[i]+k+j-i] = v;
28454                 result = ae_true;
28455             }
28456         }
28457         else
28458         {
28459 
28460             /*
28461              * Return superdiagonal element at J-th "skyline block"
28462              */
28463             k = s->uidx.ptr.p_int[j];
28464             if( j-i<=k )
28465             {
28466                 s->vals.ptr.p_double[s->ridx.ptr.p_int[j+1]-(j-i)] = v;
28467                 result = ae_true;
28468             }
28469         }
28470         return result;
28471     }
28472     return result;
28473 }
28474 
28475 
28476 /*************************************************************************
28477 This function returns I-th row of the sparse matrix. Matrix must be stored
28478 in CRS or SKS format.
28479 
28480 INPUT PARAMETERS:
28481     S           -   sparse M*N matrix in CRS format
28482     I           -   row index, 0<=I<M
28483     IRow        -   output buffer, can be  preallocated.  In  case  buffer
28484                     size  is  too  small  to  store  I-th   row,   it   is
28485                     automatically reallocated.
28486 
28487 OUTPUT PARAMETERS:
28488     IRow        -   array[M], I-th row.
28489 
28490 NOTE: this function has O(N) running time, where N is a  column  count. It
28491       allocates and fills N-element  array,  even  although  most  of  its
28492       elemets are zero.
28493 
28494 NOTE: If you have O(non-zeros-per-row) time and memory  requirements,  use
28495       SparseGetCompressedRow() function. It  returns  data  in  compressed
28496       format.
28497 
28498 NOTE: when  incorrect  I  (outside  of  [0,M-1]) or  matrix (non  CRS/SKS)
28499       is passed, this function throws exception.
28500 
28501   -- ALGLIB PROJECT --
28502      Copyright 10.12.2014 by Bochkanov Sergey
28503 *************************************************************************/
sparsegetrow(sparsematrix * s,ae_int_t i,ae_vector * irow,ae_state * _state)28504 void sparsegetrow(sparsematrix* s,
28505      ae_int_t i,
28506      /* Real    */ ae_vector* irow,
28507      ae_state *_state)
28508 {
28509     ae_int_t i0;
28510     ae_int_t j0;
28511     ae_int_t j1;
28512     ae_int_t j;
28513     ae_int_t upperprofile;
28514 
28515 
28516     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseGetRow: S must be CRS/SKS-based matrix", _state);
28517     ae_assert(i>=0&&i<s->m, "SparseGetRow: I<0 or I>=M", _state);
28518 
28519     /*
28520      * Prepare output buffer
28521      */
28522     rvectorsetlengthatleast(irow, s->n, _state);
28523     for(i0=0; i0<=s->n-1; i0++)
28524     {
28525         irow->ptr.p_double[i0] = (double)(0);
28526     }
28527 
28528     /*
28529      * Output
28530      */
28531     if( s->matrixtype==1 )
28532     {
28533         for(i0=s->ridx.ptr.p_int[i]; i0<=s->ridx.ptr.p_int[i+1]-1; i0++)
28534         {
28535             irow->ptr.p_double[s->idx.ptr.p_int[i0]] = s->vals.ptr.p_double[i0];
28536         }
28537         return;
28538     }
28539     if( s->matrixtype==2 )
28540     {
28541 
28542         /*
28543          * Copy subdiagonal and diagonal parts
28544          */
28545         ae_assert(s->n==s->m, "SparseGetRow: non-square SKS matrices are not supported", _state);
28546         j0 = i-s->didx.ptr.p_int[i];
28547         i0 = -j0+s->ridx.ptr.p_int[i];
28548         for(j=j0; j<=i; j++)
28549         {
28550             irow->ptr.p_double[j] = s->vals.ptr.p_double[j+i0];
28551         }
28552 
28553         /*
28554          * Copy superdiagonal part
28555          */
28556         upperprofile = s->uidx.ptr.p_int[s->n];
28557         j0 = i+1;
28558         j1 = ae_minint(s->n-1, i+upperprofile, _state);
28559         for(j=j0; j<=j1; j++)
28560         {
28561             if( j-i<=s->uidx.ptr.p_int[j] )
28562             {
28563                 irow->ptr.p_double[j] = s->vals.ptr.p_double[s->ridx.ptr.p_int[j+1]-(j-i)];
28564             }
28565         }
28566         return;
28567     }
28568 }
28569 
28570 
28571 /*************************************************************************
28572 This function returns I-th row of the sparse matrix IN COMPRESSED FORMAT -
28573 only non-zero elements are returned (with their indexes). Matrix  must  be
28574 stored in CRS or SKS format.
28575 
28576 INPUT PARAMETERS:
28577     S           -   sparse M*N matrix in CRS format
28578     I           -   row index, 0<=I<M
28579     ColIdx      -   output buffer for column indexes, can be preallocated.
28580                     In case buffer size is too small to store I-th row, it
28581                     is automatically reallocated.
28582     Vals        -   output buffer for values, can be preallocated. In case
28583                     buffer size is too small to  store  I-th  row,  it  is
28584                     automatically reallocated.
28585 
28586 OUTPUT PARAMETERS:
28587     ColIdx      -   column   indexes   of  non-zero  elements,  sorted  by
28588                     ascending. Symbolically non-zero elements are  counted
28589                     (i.e. if you allocated place for element, but  it  has
28590                     zero numerical value - it is counted).
28591     Vals        -   values. Vals[K] stores value of  matrix  element  with
28592                     indexes (I,ColIdx[K]). Symbolically non-zero  elements
28593                     are counted (i.e. if you allocated place for  element,
28594                     but it has zero numerical value - it is counted).
28595     NZCnt       -   number of symbolically non-zero elements per row.
28596 
28597 NOTE: when  incorrect  I  (outside  of  [0,M-1]) or  matrix (non  CRS/SKS)
28598       is passed, this function throws exception.
28599 
28600 NOTE: this function may allocate additional, unnecessary place for  ColIdx
28601       and Vals arrays. It is dictated by  performance  reasons  -  on  SKS
28602       matrices it is faster  to  allocate  space  at  the  beginning  with
28603       some "extra"-space, than performing two passes over matrix  -  first
28604       time to calculate exact space required for data, second  time  -  to
28605       store data itself.
28606 
28607   -- ALGLIB PROJECT --
28608      Copyright 10.12.2014 by Bochkanov Sergey
28609 *************************************************************************/
sparsegetcompressedrow(sparsematrix * s,ae_int_t i,ae_vector * colidx,ae_vector * vals,ae_int_t * nzcnt,ae_state * _state)28610 void sparsegetcompressedrow(sparsematrix* s,
28611      ae_int_t i,
28612      /* Integer */ ae_vector* colidx,
28613      /* Real    */ ae_vector* vals,
28614      ae_int_t* nzcnt,
28615      ae_state *_state)
28616 {
28617     ae_int_t k;
28618     ae_int_t k0;
28619     ae_int_t j;
28620     ae_int_t j0;
28621     ae_int_t j1;
28622     ae_int_t i0;
28623     ae_int_t upperprofile;
28624 
28625     *nzcnt = 0;
28626 
28627     ae_assert(s->matrixtype==1||s->matrixtype==2, "SparseGetRow: S must be CRS/SKS-based matrix", _state);
28628     ae_assert(i>=0&&i<s->m, "SparseGetRow: I<0 or I>=M", _state);
28629 
28630     /*
28631      * Initialize NZCnt
28632      */
28633     *nzcnt = 0;
28634 
28635     /*
28636      * CRS matrix - just copy data
28637      */
28638     if( s->matrixtype==1 )
28639     {
28640         *nzcnt = s->ridx.ptr.p_int[i+1]-s->ridx.ptr.p_int[i];
28641         ivectorsetlengthatleast(colidx, *nzcnt, _state);
28642         rvectorsetlengthatleast(vals, *nzcnt, _state);
28643         k0 = s->ridx.ptr.p_int[i];
28644         for(k=0; k<=*nzcnt-1; k++)
28645         {
28646             colidx->ptr.p_int[k] = s->idx.ptr.p_int[k0+k];
28647             vals->ptr.p_double[k] = s->vals.ptr.p_double[k0+k];
28648         }
28649         return;
28650     }
28651 
28652     /*
28653      * SKS matrix - a bit more complex sequence
28654      */
28655     if( s->matrixtype==2 )
28656     {
28657         ae_assert(s->n==s->m, "SparseGetCompressedRow: non-square SKS matrices are not supported", _state);
28658 
28659         /*
28660          * Allocate enough place for storage
28661          */
28662         upperprofile = s->uidx.ptr.p_int[s->n];
28663         ivectorsetlengthatleast(colidx, s->didx.ptr.p_int[i]+1+upperprofile, _state);
28664         rvectorsetlengthatleast(vals, s->didx.ptr.p_int[i]+1+upperprofile, _state);
28665 
28666         /*
28667          * Copy subdiagonal and diagonal parts
28668          */
28669         j0 = i-s->didx.ptr.p_int[i];
28670         i0 = -j0+s->ridx.ptr.p_int[i];
28671         for(j=j0; j<=i; j++)
28672         {
28673             colidx->ptr.p_int[*nzcnt] = j;
28674             vals->ptr.p_double[*nzcnt] = s->vals.ptr.p_double[j+i0];
28675             *nzcnt = *nzcnt+1;
28676         }
28677 
28678         /*
28679          * Copy superdiagonal part
28680          */
28681         j0 = i+1;
28682         j1 = ae_minint(s->n-1, i+upperprofile, _state);
28683         for(j=j0; j<=j1; j++)
28684         {
28685             if( j-i<=s->uidx.ptr.p_int[j] )
28686             {
28687                 colidx->ptr.p_int[*nzcnt] = j;
28688                 vals->ptr.p_double[*nzcnt] = s->vals.ptr.p_double[s->ridx.ptr.p_int[j+1]-(j-i)];
28689                 *nzcnt = *nzcnt+1;
28690             }
28691         }
28692         return;
28693     }
28694 }
28695 
28696 
28697 /*************************************************************************
28698 This function performs efficient in-place  transpose  of  SKS  matrix.  No
28699 additional memory is allocated during transposition.
28700 
28701 This function supports only skyline storage format (SKS).
28702 
28703 INPUT PARAMETERS
28704     S       -   sparse matrix in SKS format.
28705 
28706 OUTPUT PARAMETERS
28707     S           -   sparse matrix, transposed.
28708 
28709   -- ALGLIB PROJECT --
28710      Copyright 16.01.2014 by Bochkanov Sergey
28711 *************************************************************************/
sparsetransposesks(sparsematrix * s,ae_state * _state)28712 void sparsetransposesks(sparsematrix* s, ae_state *_state)
28713 {
28714     ae_int_t n;
28715     ae_int_t d;
28716     ae_int_t u;
28717     ae_int_t i;
28718     ae_int_t k;
28719     ae_int_t t0;
28720     ae_int_t t1;
28721     double v;
28722 
28723 
28724     ae_assert(s->matrixtype==2, "SparseTransposeSKS: only SKS matrices are supported", _state);
28725     ae_assert(s->m==s->n, "SparseTransposeSKS: non-square SKS matrices are not supported", _state);
28726     n = s->n;
28727     for(i=1; i<=n-1; i++)
28728     {
28729         d = s->didx.ptr.p_int[i];
28730         u = s->uidx.ptr.p_int[i];
28731         k = s->uidx.ptr.p_int[i];
28732         s->uidx.ptr.p_int[i] = s->didx.ptr.p_int[i];
28733         s->didx.ptr.p_int[i] = k;
28734         if( d==u )
28735         {
28736 
28737             /*
28738              * Upper skyline height equal to lower skyline height,
28739              * simple exchange is needed for transposition
28740              */
28741             t0 = s->ridx.ptr.p_int[i];
28742             for(k=0; k<=d-1; k++)
28743             {
28744                 v = s->vals.ptr.p_double[t0+k];
28745                 s->vals.ptr.p_double[t0+k] = s->vals.ptr.p_double[t0+d+1+k];
28746                 s->vals.ptr.p_double[t0+d+1+k] = v;
28747             }
28748         }
28749         if( d>u )
28750         {
28751 
28752             /*
28753              * Upper skyline height is less than lower skyline height.
28754              *
28755              * Transposition becomes a bit tricky: we have to rearrange
28756              * "L0 L1 D U" to "U D L0 L1", where |L0|=|U|=u, |L1|=d-u.
28757              *
28758              * In order to do this we perform a sequence of swaps and
28759              * in-place reversals:
28760              * * swap(L0,U)         =>  "U   L1  D   L0"
28761              * * reverse("L1 D L0") =>  "U   L0~ D   L1~" (where X~ is a reverse of X)
28762              * * reverse("L0~ D")   =>  "U   D   L0  L1~"
28763              * * reverse("L1")      =>  "U   D   L0  L1"
28764              */
28765             t0 = s->ridx.ptr.p_int[i];
28766             t1 = s->ridx.ptr.p_int[i]+d+1;
28767             for(k=0; k<=u-1; k++)
28768             {
28769                 v = s->vals.ptr.p_double[t0+k];
28770                 s->vals.ptr.p_double[t0+k] = s->vals.ptr.p_double[t1+k];
28771                 s->vals.ptr.p_double[t1+k] = v;
28772             }
28773             t0 = s->ridx.ptr.p_int[i]+u;
28774             t1 = s->ridx.ptr.p_int[i+1]-1;
28775             while(t1>t0)
28776             {
28777                 v = s->vals.ptr.p_double[t0];
28778                 s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
28779                 s->vals.ptr.p_double[t1] = v;
28780                 t0 = t0+1;
28781                 t1 = t1-1;
28782             }
28783             t0 = s->ridx.ptr.p_int[i]+u;
28784             t1 = s->ridx.ptr.p_int[i]+u+u;
28785             while(t1>t0)
28786             {
28787                 v = s->vals.ptr.p_double[t0];
28788                 s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
28789                 s->vals.ptr.p_double[t1] = v;
28790                 t0 = t0+1;
28791                 t1 = t1-1;
28792             }
28793             t0 = s->ridx.ptr.p_int[i+1]-(d-u);
28794             t1 = s->ridx.ptr.p_int[i+1]-1;
28795             while(t1>t0)
28796             {
28797                 v = s->vals.ptr.p_double[t0];
28798                 s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
28799                 s->vals.ptr.p_double[t1] = v;
28800                 t0 = t0+1;
28801                 t1 = t1-1;
28802             }
28803         }
28804         if( d<u )
28805         {
28806 
28807             /*
28808              * Upper skyline height is greater than lower skyline height.
28809              *
28810              * Transposition becomes a bit tricky: we have to rearrange
28811              * "L D U0 U1" to "U0 U1 D L", where |U1|=|L|=d, |U0|=u-d.
28812              *
28813              * In order to do this we perform a sequence of swaps and
28814              * in-place reversals:
28815              * * swap(L,U1)         =>  "U1  D   U0  L"
28816              * * reverse("U1 D U0") =>  "U0~ D   U1~ L" (where X~ is a reverse of X)
28817              * * reverse("U0~")     =>  "U0  D   U1~ L"
28818              * * reverse("D U1~")   =>  "U0  U1  D   L"
28819              */
28820             t0 = s->ridx.ptr.p_int[i];
28821             t1 = s->ridx.ptr.p_int[i+1]-d;
28822             for(k=0; k<=d-1; k++)
28823             {
28824                 v = s->vals.ptr.p_double[t0+k];
28825                 s->vals.ptr.p_double[t0+k] = s->vals.ptr.p_double[t1+k];
28826                 s->vals.ptr.p_double[t1+k] = v;
28827             }
28828             t0 = s->ridx.ptr.p_int[i];
28829             t1 = s->ridx.ptr.p_int[i]+u;
28830             while(t1>t0)
28831             {
28832                 v = s->vals.ptr.p_double[t0];
28833                 s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
28834                 s->vals.ptr.p_double[t1] = v;
28835                 t0 = t0+1;
28836                 t1 = t1-1;
28837             }
28838             t0 = s->ridx.ptr.p_int[i];
28839             t1 = s->ridx.ptr.p_int[i]+u-d-1;
28840             while(t1>t0)
28841             {
28842                 v = s->vals.ptr.p_double[t0];
28843                 s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
28844                 s->vals.ptr.p_double[t1] = v;
28845                 t0 = t0+1;
28846                 t1 = t1-1;
28847             }
28848             t0 = s->ridx.ptr.p_int[i]+u-d;
28849             t1 = s->ridx.ptr.p_int[i+1]-d-1;
28850             while(t1>t0)
28851             {
28852                 v = s->vals.ptr.p_double[t0];
28853                 s->vals.ptr.p_double[t0] = s->vals.ptr.p_double[t1];
28854                 s->vals.ptr.p_double[t1] = v;
28855                 t0 = t0+1;
28856                 t1 = t1-1;
28857             }
28858         }
28859     }
28860     k = s->uidx.ptr.p_int[n];
28861     s->uidx.ptr.p_int[n] = s->didx.ptr.p_int[n];
28862     s->didx.ptr.p_int[n] = k;
28863 }
28864 
28865 
28866 /*************************************************************************
28867 This function performs transpose of CRS matrix.
28868 
28869 INPUT PARAMETERS
28870     S       -   sparse matrix in CRS format.
28871 
28872 OUTPUT PARAMETERS
28873     S           -   sparse matrix, transposed.
28874 
28875 NOTE: internal  temporary  copy  is  allocated   for   the   purposes   of
28876       transposition. It is deallocated after transposition.
28877 
28878   -- ALGLIB PROJECT --
28879      Copyright 30.01.2018 by Bochkanov Sergey
28880 *************************************************************************/
sparsetransposecrs(sparsematrix * s,ae_state * _state)28881 void sparsetransposecrs(sparsematrix* s, ae_state *_state)
28882 {
28883     ae_frame _frame_block;
28884     ae_vector oldvals;
28885     ae_vector oldidx;
28886     ae_vector oldridx;
28887     ae_int_t oldn;
28888     ae_int_t oldm;
28889     ae_int_t newn;
28890     ae_int_t newm;
28891     ae_int_t i;
28892     ae_int_t j;
28893     ae_int_t k;
28894     ae_int_t nonne;
28895     ae_vector counts;
28896 
28897     ae_frame_make(_state, &_frame_block);
28898     memset(&oldvals, 0, sizeof(oldvals));
28899     memset(&oldidx, 0, sizeof(oldidx));
28900     memset(&oldridx, 0, sizeof(oldridx));
28901     memset(&counts, 0, sizeof(counts));
28902     ae_vector_init(&oldvals, 0, DT_REAL, _state, ae_true);
28903     ae_vector_init(&oldidx, 0, DT_INT, _state, ae_true);
28904     ae_vector_init(&oldridx, 0, DT_INT, _state, ae_true);
28905     ae_vector_init(&counts, 0, DT_INT, _state, ae_true);
28906 
28907     ae_assert(s->matrixtype==1, "SparseTransposeCRS: only CRS matrices are supported", _state);
28908     ae_swap_vectors(&s->vals, &oldvals);
28909     ae_swap_vectors(&s->idx, &oldidx);
28910     ae_swap_vectors(&s->ridx, &oldridx);
28911     oldn = s->n;
28912     oldm = s->m;
28913     newn = oldm;
28914     newm = oldn;
28915 
28916     /*
28917      * Update matrix size
28918      */
28919     s->n = newn;
28920     s->m = newm;
28921 
28922     /*
28923      * Fill RIdx by number of elements per row:
28924      * RIdx[I+1] stores number of elements in I-th row.
28925      *
28926      * Convert RIdx from row sizes to row offsets.
28927      * Set NInitialized
28928      */
28929     nonne = 0;
28930     ivectorsetlengthatleast(&s->ridx, newm+1, _state);
28931     for(i=0; i<=newm; i++)
28932     {
28933         s->ridx.ptr.p_int[i] = 0;
28934     }
28935     for(i=0; i<=oldm-1; i++)
28936     {
28937         for(j=oldridx.ptr.p_int[i]; j<=oldridx.ptr.p_int[i+1]-1; j++)
28938         {
28939             k = oldidx.ptr.p_int[j]+1;
28940             s->ridx.ptr.p_int[k] = s->ridx.ptr.p_int[k]+1;
28941             nonne = nonne+1;
28942         }
28943     }
28944     for(i=0; i<=newm-1; i++)
28945     {
28946         s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i+1]+s->ridx.ptr.p_int[i];
28947     }
28948     s->ninitialized = s->ridx.ptr.p_int[newm];
28949 
28950     /*
28951      * Allocate memory and move elements to Vals/Idx.
28952      */
28953     ae_vector_set_length(&counts, newm, _state);
28954     for(i=0; i<=newm-1; i++)
28955     {
28956         counts.ptr.p_int[i] = 0;
28957     }
28958     rvectorsetlengthatleast(&s->vals, nonne, _state);
28959     ivectorsetlengthatleast(&s->idx, nonne, _state);
28960     for(i=0; i<=oldm-1; i++)
28961     {
28962         for(j=oldridx.ptr.p_int[i]; j<=oldridx.ptr.p_int[i+1]-1; j++)
28963         {
28964             k = oldidx.ptr.p_int[j];
28965             k = s->ridx.ptr.p_int[k]+counts.ptr.p_int[k];
28966             s->idx.ptr.p_int[k] = i;
28967             s->vals.ptr.p_double[k] = oldvals.ptr.p_double[j];
28968             k = oldidx.ptr.p_int[j];
28969             counts.ptr.p_int[k] = counts.ptr.p_int[k]+1;
28970         }
28971     }
28972 
28973     /*
28974      * Initialization 'S.UIdx' and 'S.DIdx'
28975      */
28976     sparseinitduidx(s, _state);
28977     ae_frame_leave(_state);
28978 }
28979 
28980 
28981 /*************************************************************************
28982 This function performs copying with transposition of CRS matrix.
28983 
28984 INPUT PARAMETERS
28985     S0      -   sparse matrix in CRS format.
28986 
28987 OUTPUT PARAMETERS
28988     S1      -   sparse matrix, transposed
28989 
28990   -- ALGLIB PROJECT --
28991      Copyright 23.07.2018 by Bochkanov Sergey
28992 *************************************************************************/
sparsecopytransposecrs(sparsematrix * s0,sparsematrix * s1,ae_state * _state)28993 void sparsecopytransposecrs(sparsematrix* s0,
28994      sparsematrix* s1,
28995      ae_state *_state)
28996 {
28997 
28998     _sparsematrix_clear(s1);
28999 
29000     sparsecopytransposecrsbuf(s0, s1, _state);
29001 }
29002 
29003 
29004 /*************************************************************************
29005 This function performs copying with transposition of CRS matrix  (buffered
29006 version which reuses memory already allocated by  the  target as  much  as
29007 possible).
29008 
29009 INPUT PARAMETERS
29010     S0      -   sparse matrix in CRS format.
29011 
29012 OUTPUT PARAMETERS
29013     S1      -   sparse matrix, transposed; previously allocated memory  is
29014                 reused if possible.
29015 
29016   -- ALGLIB PROJECT --
29017      Copyright 23.07.2018 by Bochkanov Sergey
29018 *************************************************************************/
sparsecopytransposecrsbuf(sparsematrix * s0,sparsematrix * s1,ae_state * _state)29019 void sparsecopytransposecrsbuf(sparsematrix* s0,
29020      sparsematrix* s1,
29021      ae_state *_state)
29022 {
29023     ae_int_t oldn;
29024     ae_int_t oldm;
29025     ae_int_t newn;
29026     ae_int_t newm;
29027     ae_int_t i;
29028     ae_int_t j;
29029     ae_int_t k;
29030     ae_int_t kk;
29031     ae_int_t j0;
29032     ae_int_t j1;
29033 
29034 
29035     ae_assert(s0->matrixtype==1, "SparseCopyTransposeCRSBuf: only CRS matrices are supported", _state);
29036     oldn = s0->n;
29037     oldm = s0->m;
29038     newn = oldm;
29039     newm = oldn;
29040 
29041     /*
29042      * Update matrix size
29043      */
29044     s1->matrixtype = 1;
29045     s1->n = newn;
29046     s1->m = newm;
29047 
29048     /*
29049      * Fill RIdx by number of elements per row:
29050      * RIdx[I+1] stores number of elements in I-th row.
29051      *
29052      * Convert RIdx from row sizes to row offsets.
29053      * Set NInitialized
29054      */
29055     isetallocv(newm+1, 0, &s1->ridx, _state);
29056     for(i=0; i<=oldm-1; i++)
29057     {
29058         j0 = s0->ridx.ptr.p_int[i];
29059         j1 = s0->ridx.ptr.p_int[i+1]-1;
29060         for(j=j0; j<=j1; j++)
29061         {
29062             k = s0->idx.ptr.p_int[j]+1;
29063             s1->ridx.ptr.p_int[k] = s1->ridx.ptr.p_int[k]+1;
29064         }
29065     }
29066     for(i=0; i<=newm-1; i++)
29067     {
29068         s1->ridx.ptr.p_int[i+1] = s1->ridx.ptr.p_int[i+1]+s1->ridx.ptr.p_int[i];
29069     }
29070     s1->ninitialized = s1->ridx.ptr.p_int[newm];
29071 
29072     /*
29073      * Allocate memory and move elements to Vals/Idx.
29074      */
29075     ivectorsetlengthatleast(&s1->didx, newm, _state);
29076     for(i=0; i<=newm-1; i++)
29077     {
29078         s1->didx.ptr.p_int[i] = s1->ridx.ptr.p_int[i];
29079     }
29080     rvectorsetlengthatleast(&s1->vals, s1->ninitialized, _state);
29081     ivectorsetlengthatleast(&s1->idx, s1->ninitialized, _state);
29082     for(i=0; i<=oldm-1; i++)
29083     {
29084         j0 = s0->ridx.ptr.p_int[i];
29085         j1 = s0->ridx.ptr.p_int[i+1]-1;
29086         for(j=j0; j<=j1; j++)
29087         {
29088             kk = s0->idx.ptr.p_int[j];
29089             k = s1->didx.ptr.p_int[kk];
29090             s1->idx.ptr.p_int[k] = i;
29091             s1->vals.ptr.p_double[k] = s0->vals.ptr.p_double[j];
29092             s1->didx.ptr.p_int[kk] = k+1;
29093         }
29094     }
29095 
29096     /*
29097      * Initialization 'S.UIdx' and 'S.DIdx'
29098      */
29099     sparseinitduidx(s1, _state);
29100 }
29101 
29102 
29103 /*************************************************************************
29104 This  function  performs  in-place  conversion  to  desired sparse storage
29105 format.
29106 
29107 INPUT PARAMETERS
29108     S0      -   sparse matrix in any format.
29109     Fmt     -   desired storage format  of  the  output,  as  returned  by
29110                 SparseGetMatrixType() function:
29111                 * 0 for hash-based storage
29112                 * 1 for CRS
29113                 * 2 for SKS
29114 
29115 OUTPUT PARAMETERS
29116     S0          -   sparse matrix in requested format.
29117 
29118 NOTE: in-place conversion wastes a lot of memory which is  used  to  store
29119       temporaries.  If  you  perform  a  lot  of  repeated conversions, we
29120       recommend to use out-of-place buffered  conversion  functions,  like
29121       SparseCopyToBuf(), which can reuse already allocated memory.
29122 
29123   -- ALGLIB PROJECT --
29124      Copyright 16.01.2014 by Bochkanov Sergey
29125 *************************************************************************/
sparseconvertto(sparsematrix * s0,ae_int_t fmt,ae_state * _state)29126 void sparseconvertto(sparsematrix* s0, ae_int_t fmt, ae_state *_state)
29127 {
29128 
29129 
29130     ae_assert((fmt==0||fmt==1)||fmt==2, "SparseConvertTo: invalid fmt parameter", _state);
29131     if( fmt==0 )
29132     {
29133         sparseconverttohash(s0, _state);
29134         return;
29135     }
29136     if( fmt==1 )
29137     {
29138         sparseconverttocrs(s0, _state);
29139         return;
29140     }
29141     if( fmt==2 )
29142     {
29143         sparseconverttosks(s0, _state);
29144         return;
29145     }
29146     ae_assert(ae_false, "SparseConvertTo: invalid matrix type", _state);
29147 }
29148 
29149 
29150 /*************************************************************************
29151 This  function  performs out-of-place conversion to desired sparse storage
29152 format. S0 is copied to S1 and converted on-the-fly. Memory  allocated  in
29153 S1 is reused to maximum extent possible.
29154 
29155 INPUT PARAMETERS
29156     S0      -   sparse matrix in any format.
29157     Fmt     -   desired storage format  of  the  output,  as  returned  by
29158                 SparseGetMatrixType() function:
29159                 * 0 for hash-based storage
29160                 * 1 for CRS
29161                 * 2 for SKS
29162 
29163 OUTPUT PARAMETERS
29164     S1          -   sparse matrix in requested format.
29165 
29166   -- ALGLIB PROJECT --
29167      Copyright 16.01.2014 by Bochkanov Sergey
29168 *************************************************************************/
sparsecopytobuf(sparsematrix * s0,ae_int_t fmt,sparsematrix * s1,ae_state * _state)29169 void sparsecopytobuf(sparsematrix* s0,
29170      ae_int_t fmt,
29171      sparsematrix* s1,
29172      ae_state *_state)
29173 {
29174 
29175 
29176     ae_assert((fmt==0||fmt==1)||fmt==2, "SparseCopyToBuf: invalid fmt parameter", _state);
29177     if( fmt==0 )
29178     {
29179         sparsecopytohashbuf(s0, s1, _state);
29180         return;
29181     }
29182     if( fmt==1 )
29183     {
29184         sparsecopytocrsbuf(s0, s1, _state);
29185         return;
29186     }
29187     if( fmt==2 )
29188     {
29189         sparsecopytosksbuf(s0, s1, _state);
29190         return;
29191     }
29192     ae_assert(ae_false, "SparseCopyToBuf: invalid matrix type", _state);
29193 }
29194 
29195 
29196 /*************************************************************************
29197 This function performs in-place conversion to Hash table storage.
29198 
29199 INPUT PARAMETERS
29200     S           -   sparse matrix in CRS format.
29201 
29202 OUTPUT PARAMETERS
29203     S           -   sparse matrix in Hash table format.
29204 
29205 NOTE: this  function  has   no  effect  when  called with matrix which  is
29206       already in Hash table mode.
29207 
29208 NOTE: in-place conversion involves allocation of temporary arrays. If  you
29209       perform a lot of repeated in- place  conversions,  it  may  lead  to
29210       memory fragmentation. Consider using out-of-place SparseCopyToHashBuf()
29211       function in this case.
29212 
29213   -- ALGLIB PROJECT --
29214      Copyright 20.07.2012 by Bochkanov Sergey
29215 *************************************************************************/
sparseconverttohash(sparsematrix * s,ae_state * _state)29216 void sparseconverttohash(sparsematrix* s, ae_state *_state)
29217 {
29218     ae_frame _frame_block;
29219     ae_vector tidx;
29220     ae_vector tridx;
29221     ae_vector tdidx;
29222     ae_vector tuidx;
29223     ae_vector tvals;
29224     ae_int_t n;
29225     ae_int_t m;
29226     ae_int_t offs0;
29227     ae_int_t i;
29228     ae_int_t j;
29229     ae_int_t k;
29230 
29231     ae_frame_make(_state, &_frame_block);
29232     memset(&tidx, 0, sizeof(tidx));
29233     memset(&tridx, 0, sizeof(tridx));
29234     memset(&tdidx, 0, sizeof(tdidx));
29235     memset(&tuidx, 0, sizeof(tuidx));
29236     memset(&tvals, 0, sizeof(tvals));
29237     ae_vector_init(&tidx, 0, DT_INT, _state, ae_true);
29238     ae_vector_init(&tridx, 0, DT_INT, _state, ae_true);
29239     ae_vector_init(&tdidx, 0, DT_INT, _state, ae_true);
29240     ae_vector_init(&tuidx, 0, DT_INT, _state, ae_true);
29241     ae_vector_init(&tvals, 0, DT_REAL, _state, ae_true);
29242 
29243     ae_assert((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2, "SparseConvertToHash: invalid matrix type", _state);
29244     if( s->matrixtype==0 )
29245     {
29246 
29247         /*
29248          * Already in Hash mode
29249          */
29250         ae_frame_leave(_state);
29251         return;
29252     }
29253     if( s->matrixtype==1 )
29254     {
29255 
29256         /*
29257          * From CRS to Hash
29258          */
29259         s->matrixtype = 0;
29260         m = s->m;
29261         n = s->n;
29262         ae_swap_vectors(&s->idx, &tidx);
29263         ae_swap_vectors(&s->ridx, &tridx);
29264         ae_swap_vectors(&s->vals, &tvals);
29265         sparsecreatebuf(m, n, tridx.ptr.p_int[m], s, _state);
29266         for(i=0; i<=m-1; i++)
29267         {
29268             for(j=tridx.ptr.p_int[i]; j<=tridx.ptr.p_int[i+1]-1; j++)
29269             {
29270                 sparseset(s, i, tidx.ptr.p_int[j], tvals.ptr.p_double[j], _state);
29271             }
29272         }
29273         ae_frame_leave(_state);
29274         return;
29275     }
29276     if( s->matrixtype==2 )
29277     {
29278 
29279         /*
29280          * From SKS to Hash
29281          */
29282         s->matrixtype = 0;
29283         m = s->m;
29284         n = s->n;
29285         ae_swap_vectors(&s->ridx, &tridx);
29286         ae_swap_vectors(&s->didx, &tdidx);
29287         ae_swap_vectors(&s->uidx, &tuidx);
29288         ae_swap_vectors(&s->vals, &tvals);
29289         sparsecreatebuf(m, n, tridx.ptr.p_int[m], s, _state);
29290         for(i=0; i<=m-1; i++)
29291         {
29292 
29293             /*
29294              * copy subdiagonal and diagonal parts of I-th block
29295              */
29296             offs0 = tridx.ptr.p_int[i];
29297             k = tdidx.ptr.p_int[i]+1;
29298             for(j=0; j<=k-1; j++)
29299             {
29300                 sparseset(s, i, i-tdidx.ptr.p_int[i]+j, tvals.ptr.p_double[offs0+j], _state);
29301             }
29302 
29303             /*
29304              * Copy superdiagonal part of I-th block
29305              */
29306             offs0 = tridx.ptr.p_int[i]+tdidx.ptr.p_int[i]+1;
29307             k = tuidx.ptr.p_int[i];
29308             for(j=0; j<=k-1; j++)
29309             {
29310                 sparseset(s, i-k+j, i, tvals.ptr.p_double[offs0+j], _state);
29311             }
29312         }
29313         ae_frame_leave(_state);
29314         return;
29315     }
29316     ae_assert(ae_false, "SparseConvertToHash: invalid matrix type", _state);
29317     ae_frame_leave(_state);
29318 }
29319 
29320 
29321 /*************************************************************************
29322 This  function  performs  out-of-place  conversion  to  Hash table storage
29323 format. S0 is copied to S1 and converted on-the-fly.
29324 
29325 INPUT PARAMETERS
29326     S0          -   sparse matrix in any format.
29327 
29328 OUTPUT PARAMETERS
29329     S1          -   sparse matrix in Hash table format.
29330 
29331 NOTE: if S0 is stored as Hash-table, it is just copied without conversion.
29332 
29333 NOTE: this function de-allocates memory  occupied  by  S1 before  starting
29334       conversion. If you perform a  lot  of  repeated  conversions, it may
29335       lead to memory fragmentation. In this case we recommend you  to  use
29336       SparseCopyToHashBuf() function which re-uses memory in S1 as much as
29337       possible.
29338 
29339   -- ALGLIB PROJECT --
29340      Copyright 20.07.2012 by Bochkanov Sergey
29341 *************************************************************************/
sparsecopytohash(sparsematrix * s0,sparsematrix * s1,ae_state * _state)29342 void sparsecopytohash(sparsematrix* s0,
29343      sparsematrix* s1,
29344      ae_state *_state)
29345 {
29346 
29347     _sparsematrix_clear(s1);
29348 
29349     ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToHash: invalid matrix type", _state);
29350     sparsecopytohashbuf(s0, s1, _state);
29351 }
29352 
29353 
29354 /*************************************************************************
29355 This  function  performs  out-of-place  conversion  to  Hash table storage
29356 format. S0 is copied to S1 and converted on-the-fly. Memory  allocated  in
29357 S1 is reused to maximum extent possible.
29358 
29359 INPUT PARAMETERS
29360     S0          -   sparse matrix in any format.
29361 
29362 OUTPUT PARAMETERS
29363     S1          -   sparse matrix in Hash table format.
29364 
29365 NOTE: if S0 is stored as Hash-table, it is just copied without conversion.
29366 
29367   -- ALGLIB PROJECT --
29368      Copyright 20.07.2012 by Bochkanov Sergey
29369 *************************************************************************/
sparsecopytohashbuf(sparsematrix * s0,sparsematrix * s1,ae_state * _state)29370 void sparsecopytohashbuf(sparsematrix* s0,
29371      sparsematrix* s1,
29372      ae_state *_state)
29373 {
29374     double val;
29375     ae_int_t t0;
29376     ae_int_t t1;
29377     ae_int_t i;
29378     ae_int_t j;
29379 
29380 
29381     ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToHashBuf: invalid matrix type", _state);
29382     if( s0->matrixtype==0 )
29383     {
29384 
29385         /*
29386          * Already hash, just copy
29387          */
29388         sparsecopybuf(s0, s1, _state);
29389         return;
29390     }
29391     if( s0->matrixtype==1 )
29392     {
29393 
29394         /*
29395          * CRS storage
29396          */
29397         t0 = 0;
29398         t1 = 0;
29399         sparsecreatebuf(s0->m, s0->n, s0->ridx.ptr.p_int[s0->m], s1, _state);
29400         while(sparseenumerate(s0, &t0, &t1, &i, &j, &val, _state))
29401         {
29402             sparseset(s1, i, j, val, _state);
29403         }
29404         return;
29405     }
29406     if( s0->matrixtype==2 )
29407     {
29408 
29409         /*
29410          * SKS storage
29411          */
29412         t0 = 0;
29413         t1 = 0;
29414         sparsecreatebuf(s0->m, s0->n, s0->ridx.ptr.p_int[s0->m], s1, _state);
29415         while(sparseenumerate(s0, &t0, &t1, &i, &j, &val, _state))
29416         {
29417             sparseset(s1, i, j, val, _state);
29418         }
29419         return;
29420     }
29421     ae_assert(ae_false, "SparseCopyToHashBuf: invalid matrix type", _state);
29422 }
29423 
29424 
29425 /*************************************************************************
29426 This function converts matrix to CRS format.
29427 
29428 Some  algorithms  (linear  algebra ones, for example) require matrices in
29429 CRS format. This function allows to perform in-place conversion.
29430 
29431 INPUT PARAMETERS
29432     S           -   sparse M*N matrix in any format
29433 
29434 OUTPUT PARAMETERS
29435     S           -   matrix in CRS format
29436 
29437 NOTE: this   function  has  no  effect  when  called with matrix which is
29438       already in CRS mode.
29439 
29440 NOTE: this function allocates temporary memory to store a   copy  of  the
29441       matrix. If you perform a lot of repeated conversions, we  recommend
29442       you  to  use  SparseCopyToCRSBuf()  function,   which   can   reuse
29443       previously allocated memory.
29444 
29445   -- ALGLIB PROJECT --
29446      Copyright 14.10.2011 by Bochkanov Sergey
29447 *************************************************************************/
sparseconverttocrs(sparsematrix * s,ae_state * _state)29448 void sparseconverttocrs(sparsematrix* s, ae_state *_state)
29449 {
29450     ae_frame _frame_block;
29451     ae_int_t m;
29452     ae_int_t i;
29453     ae_int_t j;
29454     ae_vector tvals;
29455     ae_vector tidx;
29456     ae_vector temp;
29457     ae_vector tridx;
29458     ae_int_t nonne;
29459     ae_int_t k;
29460     ae_int_t offs0;
29461     ae_int_t offs1;
29462 
29463     ae_frame_make(_state, &_frame_block);
29464     memset(&tvals, 0, sizeof(tvals));
29465     memset(&tidx, 0, sizeof(tidx));
29466     memset(&temp, 0, sizeof(temp));
29467     memset(&tridx, 0, sizeof(tridx));
29468     ae_vector_init(&tvals, 0, DT_REAL, _state, ae_true);
29469     ae_vector_init(&tidx, 0, DT_INT, _state, ae_true);
29470     ae_vector_init(&temp, 0, DT_INT, _state, ae_true);
29471     ae_vector_init(&tridx, 0, DT_INT, _state, ae_true);
29472 
29473     m = s->m;
29474     if( s->matrixtype==0 )
29475     {
29476 
29477         /*
29478          * From Hash-table to CRS.
29479          * First, create local copy of the hash table.
29480          */
29481         s->matrixtype = 1;
29482         k = s->tablesize;
29483         ae_swap_vectors(&s->vals, &tvals);
29484         ae_swap_vectors(&s->idx, &tidx);
29485 
29486         /*
29487          * Fill RIdx by number of elements per row:
29488          * RIdx[I+1] stores number of elements in I-th row.
29489          *
29490          * Convert RIdx from row sizes to row offsets.
29491          * Set NInitialized
29492          */
29493         nonne = 0;
29494         ivectorsetlengthatleast(&s->ridx, s->m+1, _state);
29495         for(i=0; i<=s->m; i++)
29496         {
29497             s->ridx.ptr.p_int[i] = 0;
29498         }
29499         for(i=0; i<=k-1; i++)
29500         {
29501             if( tidx.ptr.p_int[2*i]>=0 )
29502             {
29503                 s->ridx.ptr.p_int[tidx.ptr.p_int[2*i]+1] = s->ridx.ptr.p_int[tidx.ptr.p_int[2*i]+1]+1;
29504                 nonne = nonne+1;
29505             }
29506         }
29507         for(i=0; i<=s->m-1; i++)
29508         {
29509             s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i+1]+s->ridx.ptr.p_int[i];
29510         }
29511         s->ninitialized = s->ridx.ptr.p_int[s->m];
29512 
29513         /*
29514          * Allocate memory and move elements to Vals/Idx.
29515          * Initially, elements are sorted by rows, but unsorted within row.
29516          * After initial insertion we sort elements within row.
29517          */
29518         ae_vector_set_length(&temp, s->m, _state);
29519         for(i=0; i<=s->m-1; i++)
29520         {
29521             temp.ptr.p_int[i] = 0;
29522         }
29523         rvectorsetlengthatleast(&s->vals, nonne, _state);
29524         ivectorsetlengthatleast(&s->idx, nonne, _state);
29525         for(i=0; i<=k-1; i++)
29526         {
29527             if( tidx.ptr.p_int[2*i]>=0 )
29528             {
29529                 s->vals.ptr.p_double[s->ridx.ptr.p_int[tidx.ptr.p_int[2*i]]+temp.ptr.p_int[tidx.ptr.p_int[2*i]]] = tvals.ptr.p_double[i];
29530                 s->idx.ptr.p_int[s->ridx.ptr.p_int[tidx.ptr.p_int[2*i]]+temp.ptr.p_int[tidx.ptr.p_int[2*i]]] = tidx.ptr.p_int[2*i+1];
29531                 temp.ptr.p_int[tidx.ptr.p_int[2*i]] = temp.ptr.p_int[tidx.ptr.p_int[2*i]]+1;
29532             }
29533         }
29534         for(i=0; i<=s->m-1; i++)
29535         {
29536             tagsortmiddleir(&s->idx, &s->vals, s->ridx.ptr.p_int[i], s->ridx.ptr.p_int[i+1]-s->ridx.ptr.p_int[i], _state);
29537         }
29538 
29539         /*
29540          * Initialization 'S.UIdx' and 'S.DIdx'
29541          */
29542         sparseinitduidx(s, _state);
29543         ae_frame_leave(_state);
29544         return;
29545     }
29546     if( s->matrixtype==1 )
29547     {
29548 
29549         /*
29550          * Already CRS
29551          */
29552         ae_frame_leave(_state);
29553         return;
29554     }
29555     if( s->matrixtype==2 )
29556     {
29557         ae_assert(s->m==s->n, "SparseConvertToCRS: non-square SKS matrices are not supported", _state);
29558 
29559         /*
29560          * From SKS to CRS.
29561          *
29562          * First, create local copy of the SKS matrix (Vals,
29563          * Idx, RIdx are stored; DIdx/UIdx for some time are
29564          * left in the SparseMatrix structure).
29565          */
29566         s->matrixtype = 1;
29567         ae_swap_vectors(&s->vals, &tvals);
29568         ae_swap_vectors(&s->idx, &tidx);
29569         ae_swap_vectors(&s->ridx, &tridx);
29570 
29571         /*
29572          * Fill RIdx by number of elements per row:
29573          * RIdx[I+1] stores number of elements in I-th row.
29574          *
29575          * Convert RIdx from row sizes to row offsets.
29576          * Set NInitialized
29577          */
29578         ivectorsetlengthatleast(&s->ridx, m+1, _state);
29579         s->ridx.ptr.p_int[0] = 0;
29580         for(i=1; i<=m; i++)
29581         {
29582             s->ridx.ptr.p_int[i] = 1;
29583         }
29584         nonne = 0;
29585         for(i=0; i<=m-1; i++)
29586         {
29587             s->ridx.ptr.p_int[i+1] = s->didx.ptr.p_int[i]+s->ridx.ptr.p_int[i+1];
29588             for(j=i-s->uidx.ptr.p_int[i]; j<=i-1; j++)
29589             {
29590                 s->ridx.ptr.p_int[j+1] = s->ridx.ptr.p_int[j+1]+1;
29591             }
29592             nonne = nonne+s->didx.ptr.p_int[i]+1+s->uidx.ptr.p_int[i];
29593         }
29594         for(i=0; i<=s->m-1; i++)
29595         {
29596             s->ridx.ptr.p_int[i+1] = s->ridx.ptr.p_int[i+1]+s->ridx.ptr.p_int[i];
29597         }
29598         s->ninitialized = s->ridx.ptr.p_int[s->m];
29599 
29600         /*
29601          * Allocate memory and move elements to Vals/Idx.
29602          * Initially, elements are sorted by rows, and are sorted within row too.
29603          * No additional post-sorting is required.
29604          */
29605         ae_vector_set_length(&temp, m, _state);
29606         for(i=0; i<=m-1; i++)
29607         {
29608             temp.ptr.p_int[i] = 0;
29609         }
29610         rvectorsetlengthatleast(&s->vals, nonne, _state);
29611         ivectorsetlengthatleast(&s->idx, nonne, _state);
29612         for(i=0; i<=m-1; i++)
29613         {
29614 
29615             /*
29616              * copy subdiagonal and diagonal parts of I-th block
29617              */
29618             offs0 = tridx.ptr.p_int[i];
29619             offs1 = s->ridx.ptr.p_int[i]+temp.ptr.p_int[i];
29620             k = s->didx.ptr.p_int[i]+1;
29621             for(j=0; j<=k-1; j++)
29622             {
29623                 s->vals.ptr.p_double[offs1+j] = tvals.ptr.p_double[offs0+j];
29624                 s->idx.ptr.p_int[offs1+j] = i-s->didx.ptr.p_int[i]+j;
29625             }
29626             temp.ptr.p_int[i] = temp.ptr.p_int[i]+s->didx.ptr.p_int[i]+1;
29627 
29628             /*
29629              * Copy superdiagonal part of I-th block
29630              */
29631             offs0 = tridx.ptr.p_int[i]+s->didx.ptr.p_int[i]+1;
29632             k = s->uidx.ptr.p_int[i];
29633             for(j=0; j<=k-1; j++)
29634             {
29635                 offs1 = s->ridx.ptr.p_int[i-k+j]+temp.ptr.p_int[i-k+j];
29636                 s->vals.ptr.p_double[offs1] = tvals.ptr.p_double[offs0+j];
29637                 s->idx.ptr.p_int[offs1] = i;
29638                 temp.ptr.p_int[i-k+j] = temp.ptr.p_int[i-k+j]+1;
29639             }
29640         }
29641 
29642         /*
29643          * Initialization 'S.UIdx' and 'S.DIdx'
29644          */
29645         sparseinitduidx(s, _state);
29646         ae_frame_leave(_state);
29647         return;
29648     }
29649     ae_assert(ae_false, "SparseConvertToCRS: invalid matrix type", _state);
29650     ae_frame_leave(_state);
29651 }
29652 
29653 
29654 /*************************************************************************
29655 This  function  performs  out-of-place  conversion  to  CRS format.  S0 is
29656 copied to S1 and converted on-the-fly.
29657 
29658 INPUT PARAMETERS
29659     S0          -   sparse matrix in any format.
29660 
29661 OUTPUT PARAMETERS
29662     S1          -   sparse matrix in CRS format.
29663 
29664 NOTE: if S0 is stored as CRS, it is just copied without conversion.
29665 
29666 NOTE: this function de-allocates memory occupied by S1 before starting CRS
29667       conversion. If you perform a lot of repeated CRS conversions, it may
29668       lead to memory fragmentation. In this case we recommend you  to  use
29669       SparseCopyToCRSBuf() function which re-uses memory in S1 as much  as
29670       possible.
29671 
29672   -- ALGLIB PROJECT --
29673      Copyright 20.07.2012 by Bochkanov Sergey
29674 *************************************************************************/
sparsecopytocrs(sparsematrix * s0,sparsematrix * s1,ae_state * _state)29675 void sparsecopytocrs(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
29676 {
29677 
29678     _sparsematrix_clear(s1);
29679 
29680     ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToCRS: invalid matrix type", _state);
29681     sparsecopytocrsbuf(s0, s1, _state);
29682 }
29683 
29684 
29685 /*************************************************************************
29686 This  function  performs  out-of-place  conversion  to  CRS format.  S0 is
29687 copied to S1 and converted on-the-fly. Memory allocated in S1 is reused to
29688 maximum extent possible.
29689 
29690 INPUT PARAMETERS
29691     S0          -   sparse matrix in any format.
29692     S1          -   matrix which may contain some pre-allocated memory, or
29693                     can be just uninitialized structure.
29694 
29695 OUTPUT PARAMETERS
29696     S1          -   sparse matrix in CRS format.
29697 
29698 NOTE: if S0 is stored as CRS, it is just copied without conversion.
29699 
29700   -- ALGLIB PROJECT --
29701      Copyright 20.07.2012 by Bochkanov Sergey
29702 *************************************************************************/
sparsecopytocrsbuf(sparsematrix * s0,sparsematrix * s1,ae_state * _state)29703 void sparsecopytocrsbuf(sparsematrix* s0,
29704      sparsematrix* s1,
29705      ae_state *_state)
29706 {
29707     ae_frame _frame_block;
29708     ae_vector temp;
29709     ae_int_t nonne;
29710     ae_int_t i;
29711     ae_int_t j;
29712     ae_int_t k;
29713     ae_int_t offs0;
29714     ae_int_t offs1;
29715     ae_int_t m;
29716 
29717     ae_frame_make(_state, &_frame_block);
29718     memset(&temp, 0, sizeof(temp));
29719     ae_vector_init(&temp, 0, DT_INT, _state, ae_true);
29720 
29721     ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToCRSBuf: invalid matrix type", _state);
29722     m = s0->m;
29723     if( s0->matrixtype==0 )
29724     {
29725 
29726         /*
29727          * Convert from hash-table to CRS
29728          * Done like ConvertToCRS function
29729          */
29730         s1->matrixtype = 1;
29731         s1->m = s0->m;
29732         s1->n = s0->n;
29733         s1->nfree = s0->nfree;
29734         nonne = 0;
29735         k = s0->tablesize;
29736         ivectorsetlengthatleast(&s1->ridx, s1->m+1, _state);
29737         for(i=0; i<=s1->m; i++)
29738         {
29739             s1->ridx.ptr.p_int[i] = 0;
29740         }
29741         ae_vector_set_length(&temp, s1->m, _state);
29742         for(i=0; i<=s1->m-1; i++)
29743         {
29744             temp.ptr.p_int[i] = 0;
29745         }
29746 
29747         /*
29748          * Number of elements per row
29749          */
29750         for(i=0; i<=k-1; i++)
29751         {
29752             if( s0->idx.ptr.p_int[2*i]>=0 )
29753             {
29754                 s1->ridx.ptr.p_int[s0->idx.ptr.p_int[2*i]+1] = s1->ridx.ptr.p_int[s0->idx.ptr.p_int[2*i]+1]+1;
29755                 nonne = nonne+1;
29756             }
29757         }
29758 
29759         /*
29760          * Fill RIdx (offsets of rows)
29761          */
29762         for(i=0; i<=s1->m-1; i++)
29763         {
29764             s1->ridx.ptr.p_int[i+1] = s1->ridx.ptr.p_int[i+1]+s1->ridx.ptr.p_int[i];
29765         }
29766 
29767         /*
29768          * Allocate memory
29769          */
29770         rvectorsetlengthatleast(&s1->vals, nonne, _state);
29771         ivectorsetlengthatleast(&s1->idx, nonne, _state);
29772         for(i=0; i<=k-1; i++)
29773         {
29774             if( s0->idx.ptr.p_int[2*i]>=0 )
29775             {
29776                 s1->vals.ptr.p_double[s1->ridx.ptr.p_int[s0->idx.ptr.p_int[2*i]]+temp.ptr.p_int[s0->idx.ptr.p_int[2*i]]] = s0->vals.ptr.p_double[i];
29777                 s1->idx.ptr.p_int[s1->ridx.ptr.p_int[s0->idx.ptr.p_int[2*i]]+temp.ptr.p_int[s0->idx.ptr.p_int[2*i]]] = s0->idx.ptr.p_int[2*i+1];
29778                 temp.ptr.p_int[s0->idx.ptr.p_int[2*i]] = temp.ptr.p_int[s0->idx.ptr.p_int[2*i]]+1;
29779             }
29780         }
29781 
29782         /*
29783          * Set NInitialized
29784          */
29785         s1->ninitialized = s1->ridx.ptr.p_int[s1->m];
29786 
29787         /*
29788          * Sorting of elements
29789          */
29790         for(i=0; i<=s1->m-1; i++)
29791         {
29792             tagsortmiddleir(&s1->idx, &s1->vals, s1->ridx.ptr.p_int[i], s1->ridx.ptr.p_int[i+1]-s1->ridx.ptr.p_int[i], _state);
29793         }
29794 
29795         /*
29796          * Initialization 'S.UIdx' and 'S.DIdx'
29797          */
29798         sparseinitduidx(s1, _state);
29799         ae_frame_leave(_state);
29800         return;
29801     }
29802     if( s0->matrixtype==1 )
29803     {
29804 
29805         /*
29806          * Already CRS, just copy
29807          */
29808         sparsecopybuf(s0, s1, _state);
29809         ae_frame_leave(_state);
29810         return;
29811     }
29812     if( s0->matrixtype==2 )
29813     {
29814         ae_assert(s0->m==s0->n, "SparseCopyToCRS: non-square SKS matrices are not supported", _state);
29815 
29816         /*
29817          * From SKS to CRS.
29818          */
29819         s1->m = s0->m;
29820         s1->n = s0->n;
29821         s1->matrixtype = 1;
29822 
29823         /*
29824          * Fill RIdx by number of elements per row:
29825          * RIdx[I+1] stores number of elements in I-th row.
29826          *
29827          * Convert RIdx from row sizes to row offsets.
29828          * Set NInitialized
29829          */
29830         ivectorsetlengthatleast(&s1->ridx, m+1, _state);
29831         s1->ridx.ptr.p_int[0] = 0;
29832         for(i=1; i<=m; i++)
29833         {
29834             s1->ridx.ptr.p_int[i] = 1;
29835         }
29836         nonne = 0;
29837         for(i=0; i<=m-1; i++)
29838         {
29839             s1->ridx.ptr.p_int[i+1] = s0->didx.ptr.p_int[i]+s1->ridx.ptr.p_int[i+1];
29840             for(j=i-s0->uidx.ptr.p_int[i]; j<=i-1; j++)
29841             {
29842                 s1->ridx.ptr.p_int[j+1] = s1->ridx.ptr.p_int[j+1]+1;
29843             }
29844             nonne = nonne+s0->didx.ptr.p_int[i]+1+s0->uidx.ptr.p_int[i];
29845         }
29846         for(i=0; i<=m-1; i++)
29847         {
29848             s1->ridx.ptr.p_int[i+1] = s1->ridx.ptr.p_int[i+1]+s1->ridx.ptr.p_int[i];
29849         }
29850         s1->ninitialized = s1->ridx.ptr.p_int[m];
29851 
29852         /*
29853          * Allocate memory and move elements to Vals/Idx.
29854          * Initially, elements are sorted by rows, and are sorted within row too.
29855          * No additional post-sorting is required.
29856          */
29857         ae_vector_set_length(&temp, m, _state);
29858         for(i=0; i<=m-1; i++)
29859         {
29860             temp.ptr.p_int[i] = 0;
29861         }
29862         rvectorsetlengthatleast(&s1->vals, nonne, _state);
29863         ivectorsetlengthatleast(&s1->idx, nonne, _state);
29864         for(i=0; i<=m-1; i++)
29865         {
29866 
29867             /*
29868              * copy subdiagonal and diagonal parts of I-th block
29869              */
29870             offs0 = s0->ridx.ptr.p_int[i];
29871             offs1 = s1->ridx.ptr.p_int[i]+temp.ptr.p_int[i];
29872             k = s0->didx.ptr.p_int[i]+1;
29873             for(j=0; j<=k-1; j++)
29874             {
29875                 s1->vals.ptr.p_double[offs1+j] = s0->vals.ptr.p_double[offs0+j];
29876                 s1->idx.ptr.p_int[offs1+j] = i-s0->didx.ptr.p_int[i]+j;
29877             }
29878             temp.ptr.p_int[i] = temp.ptr.p_int[i]+s0->didx.ptr.p_int[i]+1;
29879 
29880             /*
29881              * Copy superdiagonal part of I-th block
29882              */
29883             offs0 = s0->ridx.ptr.p_int[i]+s0->didx.ptr.p_int[i]+1;
29884             k = s0->uidx.ptr.p_int[i];
29885             for(j=0; j<=k-1; j++)
29886             {
29887                 offs1 = s1->ridx.ptr.p_int[i-k+j]+temp.ptr.p_int[i-k+j];
29888                 s1->vals.ptr.p_double[offs1] = s0->vals.ptr.p_double[offs0+j];
29889                 s1->idx.ptr.p_int[offs1] = i;
29890                 temp.ptr.p_int[i-k+j] = temp.ptr.p_int[i-k+j]+1;
29891             }
29892         }
29893 
29894         /*
29895          * Initialization 'S.UIdx' and 'S.DIdx'
29896          */
29897         sparseinitduidx(s1, _state);
29898         ae_frame_leave(_state);
29899         return;
29900     }
29901     ae_assert(ae_false, "SparseCopyToCRSBuf: unexpected matrix type", _state);
29902     ae_frame_leave(_state);
29903 }
29904 
29905 
29906 /*************************************************************************
29907 This function performs in-place conversion to SKS format.
29908 
29909 INPUT PARAMETERS
29910     S           -   sparse matrix in any format.
29911 
29912 OUTPUT PARAMETERS
29913     S           -   sparse matrix in SKS format.
29914 
29915 NOTE: this  function  has   no  effect  when  called with matrix which  is
29916       already in SKS mode.
29917 
29918 NOTE: in-place conversion involves allocation of temporary arrays. If  you
29919       perform a lot of repeated in- place  conversions,  it  may  lead  to
29920       memory fragmentation. Consider using out-of-place SparseCopyToSKSBuf()
29921       function in this case.
29922 
29923   -- ALGLIB PROJECT --
29924      Copyright 15.01.2014 by Bochkanov Sergey
29925 *************************************************************************/
sparseconverttosks(sparsematrix * s,ae_state * _state)29926 void sparseconverttosks(sparsematrix* s, ae_state *_state)
29927 {
29928     ae_frame _frame_block;
29929     ae_vector tridx;
29930     ae_vector tdidx;
29931     ae_vector tuidx;
29932     ae_vector tvals;
29933     ae_int_t n;
29934     ae_int_t t0;
29935     ae_int_t t1;
29936     ae_int_t i;
29937     ae_int_t j;
29938     ae_int_t k;
29939     double v;
29940 
29941     ae_frame_make(_state, &_frame_block);
29942     memset(&tridx, 0, sizeof(tridx));
29943     memset(&tdidx, 0, sizeof(tdidx));
29944     memset(&tuidx, 0, sizeof(tuidx));
29945     memset(&tvals, 0, sizeof(tvals));
29946     ae_vector_init(&tridx, 0, DT_INT, _state, ae_true);
29947     ae_vector_init(&tdidx, 0, DT_INT, _state, ae_true);
29948     ae_vector_init(&tuidx, 0, DT_INT, _state, ae_true);
29949     ae_vector_init(&tvals, 0, DT_REAL, _state, ae_true);
29950 
29951     ae_assert((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2, "SparseConvertToSKS: invalid matrix type", _state);
29952     ae_assert(s->m==s->n, "SparseConvertToSKS: rectangular matrices are not supported", _state);
29953     n = s->n;
29954     if( s->matrixtype==2 )
29955     {
29956 
29957         /*
29958          * Already in SKS mode
29959          */
29960         ae_frame_leave(_state);
29961         return;
29962     }
29963 
29964     /*
29965      * Generate internal copy of SKS matrix
29966      */
29967     ivectorsetlengthatleast(&tdidx, n+1, _state);
29968     ivectorsetlengthatleast(&tuidx, n+1, _state);
29969     for(i=0; i<=n; i++)
29970     {
29971         tdidx.ptr.p_int[i] = 0;
29972         tuidx.ptr.p_int[i] = 0;
29973     }
29974     t0 = 0;
29975     t1 = 0;
29976     while(sparseenumerate(s, &t0, &t1, &i, &j, &v, _state))
29977     {
29978         if( j<i )
29979         {
29980             tdidx.ptr.p_int[i] = ae_maxint(tdidx.ptr.p_int[i], i-j, _state);
29981         }
29982         else
29983         {
29984             tuidx.ptr.p_int[j] = ae_maxint(tuidx.ptr.p_int[j], j-i, _state);
29985         }
29986     }
29987     ivectorsetlengthatleast(&tridx, n+1, _state);
29988     tridx.ptr.p_int[0] = 0;
29989     for(i=1; i<=n; i++)
29990     {
29991         tridx.ptr.p_int[i] = tridx.ptr.p_int[i-1]+tdidx.ptr.p_int[i-1]+1+tuidx.ptr.p_int[i-1];
29992     }
29993     rvectorsetlengthatleast(&tvals, tridx.ptr.p_int[n], _state);
29994     k = tridx.ptr.p_int[n];
29995     for(i=0; i<=k-1; i++)
29996     {
29997         tvals.ptr.p_double[i] = 0.0;
29998     }
29999     t0 = 0;
30000     t1 = 0;
30001     while(sparseenumerate(s, &t0, &t1, &i, &j, &v, _state))
30002     {
30003         if( j<=i )
30004         {
30005             tvals.ptr.p_double[tridx.ptr.p_int[i]+tdidx.ptr.p_int[i]-(i-j)] = v;
30006         }
30007         else
30008         {
30009             tvals.ptr.p_double[tridx.ptr.p_int[j+1]-(j-i)] = v;
30010         }
30011     }
30012     for(i=0; i<=n-1; i++)
30013     {
30014         tdidx.ptr.p_int[n] = ae_maxint(tdidx.ptr.p_int[n], tdidx.ptr.p_int[i], _state);
30015         tuidx.ptr.p_int[n] = ae_maxint(tuidx.ptr.p_int[n], tuidx.ptr.p_int[i], _state);
30016     }
30017     s->matrixtype = 2;
30018     s->ninitialized = 0;
30019     s->nfree = 0;
30020     s->m = n;
30021     s->n = n;
30022     ae_swap_vectors(&s->didx, &tdidx);
30023     ae_swap_vectors(&s->uidx, &tuidx);
30024     ae_swap_vectors(&s->ridx, &tridx);
30025     ae_swap_vectors(&s->vals, &tvals);
30026     ae_frame_leave(_state);
30027 }
30028 
30029 
30030 /*************************************************************************
30031 This  function  performs  out-of-place  conversion  to SKS storage format.
30032 S0 is copied to S1 and converted on-the-fly.
30033 
30034 INPUT PARAMETERS
30035     S0          -   sparse matrix in any format.
30036 
30037 OUTPUT PARAMETERS
30038     S1          -   sparse matrix in SKS format.
30039 
30040 NOTE: if S0 is stored as SKS, it is just copied without conversion.
30041 
30042 NOTE: this function de-allocates memory  occupied  by  S1 before  starting
30043       conversion. If you perform a  lot  of  repeated  conversions, it may
30044       lead to memory fragmentation. In this case we recommend you  to  use
30045       SparseCopyToSKSBuf() function which re-uses memory in S1 as much  as
30046       possible.
30047 
30048   -- ALGLIB PROJECT --
30049      Copyright 20.07.2012 by Bochkanov Sergey
30050 *************************************************************************/
sparsecopytosks(sparsematrix * s0,sparsematrix * s1,ae_state * _state)30051 void sparsecopytosks(sparsematrix* s0, sparsematrix* s1, ae_state *_state)
30052 {
30053 
30054     _sparsematrix_clear(s1);
30055 
30056     ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToSKS: invalid matrix type", _state);
30057     sparsecopytosksbuf(s0, s1, _state);
30058 }
30059 
30060 
30061 /*************************************************************************
30062 This  function  performs  out-of-place  conversion  to SKS format.  S0  is
30063 copied to S1 and converted on-the-fly. Memory  allocated  in S1 is  reused
30064 to maximum extent possible.
30065 
30066 INPUT PARAMETERS
30067     S0          -   sparse matrix in any format.
30068 
30069 OUTPUT PARAMETERS
30070     S1          -   sparse matrix in SKS format.
30071 
30072 NOTE: if S0 is stored as SKS, it is just copied without conversion.
30073 
30074   -- ALGLIB PROJECT --
30075      Copyright 20.07.2012 by Bochkanov Sergey
30076 *************************************************************************/
sparsecopytosksbuf(sparsematrix * s0,sparsematrix * s1,ae_state * _state)30077 void sparsecopytosksbuf(sparsematrix* s0,
30078      sparsematrix* s1,
30079      ae_state *_state)
30080 {
30081     double v;
30082     ae_int_t n;
30083     ae_int_t t0;
30084     ae_int_t t1;
30085     ae_int_t i;
30086     ae_int_t j;
30087     ae_int_t k;
30088 
30089 
30090     ae_assert((s0->matrixtype==0||s0->matrixtype==1)||s0->matrixtype==2, "SparseCopyToSKSBuf: invalid matrix type", _state);
30091     ae_assert(s0->m==s0->n, "SparseCopyToSKSBuf: rectangular matrices are not supported", _state);
30092     n = s0->n;
30093     if( s0->matrixtype==2 )
30094     {
30095 
30096         /*
30097          * Already SKS, just copy
30098          */
30099         sparsecopybuf(s0, s1, _state);
30100         return;
30101     }
30102 
30103     /*
30104      * Generate copy of matrix in the SKS format
30105      */
30106     ivectorsetlengthatleast(&s1->didx, n+1, _state);
30107     ivectorsetlengthatleast(&s1->uidx, n+1, _state);
30108     for(i=0; i<=n; i++)
30109     {
30110         s1->didx.ptr.p_int[i] = 0;
30111         s1->uidx.ptr.p_int[i] = 0;
30112     }
30113     t0 = 0;
30114     t1 = 0;
30115     while(sparseenumerate(s0, &t0, &t1, &i, &j, &v, _state))
30116     {
30117         if( j<i )
30118         {
30119             s1->didx.ptr.p_int[i] = ae_maxint(s1->didx.ptr.p_int[i], i-j, _state);
30120         }
30121         else
30122         {
30123             s1->uidx.ptr.p_int[j] = ae_maxint(s1->uidx.ptr.p_int[j], j-i, _state);
30124         }
30125     }
30126     ivectorsetlengthatleast(&s1->ridx, n+1, _state);
30127     s1->ridx.ptr.p_int[0] = 0;
30128     for(i=1; i<=n; i++)
30129     {
30130         s1->ridx.ptr.p_int[i] = s1->ridx.ptr.p_int[i-1]+s1->didx.ptr.p_int[i-1]+1+s1->uidx.ptr.p_int[i-1];
30131     }
30132     rvectorsetlengthatleast(&s1->vals, s1->ridx.ptr.p_int[n], _state);
30133     k = s1->ridx.ptr.p_int[n];
30134     for(i=0; i<=k-1; i++)
30135     {
30136         s1->vals.ptr.p_double[i] = 0.0;
30137     }
30138     t0 = 0;
30139     t1 = 0;
30140     while(sparseenumerate(s0, &t0, &t1, &i, &j, &v, _state))
30141     {
30142         if( j<=i )
30143         {
30144             s1->vals.ptr.p_double[s1->ridx.ptr.p_int[i]+s1->didx.ptr.p_int[i]-(i-j)] = v;
30145         }
30146         else
30147         {
30148             s1->vals.ptr.p_double[s1->ridx.ptr.p_int[j+1]-(j-i)] = v;
30149         }
30150     }
30151     for(i=0; i<=n-1; i++)
30152     {
30153         s1->didx.ptr.p_int[n] = ae_maxint(s1->didx.ptr.p_int[n], s1->didx.ptr.p_int[i], _state);
30154         s1->uidx.ptr.p_int[n] = ae_maxint(s1->uidx.ptr.p_int[n], s1->uidx.ptr.p_int[i], _state);
30155     }
30156     s1->matrixtype = 2;
30157     s1->ninitialized = 0;
30158     s1->nfree = 0;
30159     s1->m = n;
30160     s1->n = n;
30161 }
30162 
30163 
30164 /*************************************************************************
30165 This non-accessible to user function performs  in-place  creation  of  CRS
30166 matrix. It is expected that:
30167 * S.M and S.N are initialized
30168 * S.RIdx, S.Idx and S.Vals are loaded with values in CRS  format  used  by
30169   ALGLIB, with elements of S.Idx/S.Vals  possibly  being  unsorted  within
30170   each row (this constructor function may post-sort matrix,  assuming that
30171   it is sorted by rows).
30172 
30173 Only 5 fields should be set by caller. Other fields will be  rewritten  by
30174 this constructor function.
30175 
30176 This function performs integrity check on user-specified values, with  the
30177 only exception being Vals[] array:
30178 * it does not require values to be non-zero
30179 * it does not check for elements of Vals[] being finite IEEE-754 values
30180 
30181 INPUT PARAMETERS
30182     S   -   sparse matrix with corresponding fields set by caller
30183 
30184 OUTPUT PARAMETERS
30185     S   -   sparse matrix in CRS format.
30186 
30187   -- ALGLIB PROJECT --
30188      Copyright 20.08.2016 by Bochkanov Sergey
30189 *************************************************************************/
sparsecreatecrsinplace(sparsematrix * s,ae_state * _state)30190 void sparsecreatecrsinplace(sparsematrix* s, ae_state *_state)
30191 {
30192     ae_int_t m;
30193     ae_int_t n;
30194     ae_int_t i;
30195     ae_int_t j;
30196     ae_int_t j0;
30197     ae_int_t j1;
30198 
30199 
30200     m = s->m;
30201     n = s->n;
30202 
30203     /*
30204      * Quick exit for M=0 or N=0
30205      */
30206     ae_assert(s->m>=0, "SparseCreateCRSInplace: integrity check failed", _state);
30207     ae_assert(s->n>=0, "SparseCreateCRSInplace: integrity check failed", _state);
30208     if( m==0||n==0 )
30209     {
30210         s->matrixtype = 1;
30211         s->ninitialized = 0;
30212         ivectorsetlengthatleast(&s->ridx, s->m+1, _state);
30213         ivectorsetlengthatleast(&s->didx, s->m, _state);
30214         ivectorsetlengthatleast(&s->uidx, s->m, _state);
30215         for(i=0; i<=s->m-1; i++)
30216         {
30217             s->ridx.ptr.p_int[i] = 0;
30218             s->uidx.ptr.p_int[i] = 0;
30219             s->didx.ptr.p_int[i] = 0;
30220         }
30221         s->ridx.ptr.p_int[s->m] = 0;
30222         return;
30223     }
30224 
30225     /*
30226      * Perform integrity check
30227      */
30228     ae_assert(s->m>0, "SparseCreateCRSInplace: integrity check failed", _state);
30229     ae_assert(s->n>0, "SparseCreateCRSInplace: integrity check failed", _state);
30230     ae_assert(s->ridx.cnt>=m+1, "SparseCreateCRSInplace: integrity check failed", _state);
30231     for(i=0; i<=m-1; i++)
30232     {
30233         ae_assert(s->ridx.ptr.p_int[i]>=0&&s->ridx.ptr.p_int[i]<=s->ridx.ptr.p_int[i+1], "SparseCreateCRSInplace: integrity check failed", _state);
30234     }
30235     ae_assert(s->ridx.ptr.p_int[m]<=s->idx.cnt, "SparseCreateCRSInplace: integrity check failed", _state);
30236     ae_assert(s->ridx.ptr.p_int[m]<=s->vals.cnt, "SparseCreateCRSInplace: integrity check failed", _state);
30237     for(i=0; i<=m-1; i++)
30238     {
30239         j0 = s->ridx.ptr.p_int[i];
30240         j1 = s->ridx.ptr.p_int[i+1]-1;
30241         for(j=j0; j<=j1; j++)
30242         {
30243             ae_assert(s->idx.ptr.p_int[j]>=0&&s->idx.ptr.p_int[j]<n, "SparseCreateCRSInplace: integrity check failed", _state);
30244         }
30245     }
30246 
30247     /*
30248      * Initialize
30249      */
30250     s->matrixtype = 1;
30251     s->ninitialized = s->ridx.ptr.p_int[m];
30252     for(i=0; i<=m-1; i++)
30253     {
30254         tagsortmiddleir(&s->idx, &s->vals, s->ridx.ptr.p_int[i], s->ridx.ptr.p_int[i+1]-s->ridx.ptr.p_int[i], _state);
30255     }
30256     sparseinitduidx(s, _state);
30257 }
30258 
30259 
30260 /*************************************************************************
30261 This function returns type of the matrix storage format.
30262 
30263 INPUT PARAMETERS:
30264     S           -   sparse matrix.
30265 
30266 RESULT:
30267     sparse storage format used by matrix:
30268         0   -   Hash-table
30269         1   -   CRS (compressed row storage)
30270         2   -   SKS (skyline)
30271 
30272 NOTE: future  versions  of  ALGLIB  may  include additional sparse storage
30273       formats.
30274 
30275 
30276   -- ALGLIB PROJECT --
30277      Copyright 20.07.2012 by Bochkanov Sergey
30278 *************************************************************************/
sparsegetmatrixtype(sparsematrix * s,ae_state * _state)30279 ae_int_t sparsegetmatrixtype(sparsematrix* s, ae_state *_state)
30280 {
30281     ae_int_t result;
30282 
30283 
30284     ae_assert((((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2)||s->matrixtype==-10081)||s->matrixtype==-10082, "SparseGetMatrixType: invalid matrix type", _state);
30285     result = s->matrixtype;
30286     return result;
30287 }
30288 
30289 
30290 /*************************************************************************
30291 This function checks matrix storage format and returns True when matrix is
30292 stored using Hash table representation.
30293 
30294 INPUT PARAMETERS:
30295     S   -   sparse matrix.
30296 
30297 RESULT:
30298     True if matrix type is Hash table
30299     False if matrix type is not Hash table
30300 
30301   -- ALGLIB PROJECT --
30302      Copyright 20.07.2012 by Bochkanov Sergey
30303 *************************************************************************/
sparseishash(sparsematrix * s,ae_state * _state)30304 ae_bool sparseishash(sparsematrix* s, ae_state *_state)
30305 {
30306     ae_bool result;
30307 
30308 
30309     ae_assert((((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2)||s->matrixtype==-10081)||s->matrixtype==-10082, "SparseIsHash: invalid matrix type", _state);
30310     result = s->matrixtype==0;
30311     return result;
30312 }
30313 
30314 
30315 /*************************************************************************
30316 This function checks matrix storage format and returns True when matrix is
30317 stored using CRS representation.
30318 
30319 INPUT PARAMETERS:
30320     S   -   sparse matrix.
30321 
30322 RESULT:
30323     True if matrix type is CRS
30324     False if matrix type is not CRS
30325 
30326   -- ALGLIB PROJECT --
30327      Copyright 20.07.2012 by Bochkanov Sergey
30328 *************************************************************************/
sparseiscrs(sparsematrix * s,ae_state * _state)30329 ae_bool sparseiscrs(sparsematrix* s, ae_state *_state)
30330 {
30331     ae_bool result;
30332 
30333 
30334     ae_assert((((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2)||s->matrixtype==-10081)||s->matrixtype==-10082, "SparseIsCRS: invalid matrix type", _state);
30335     result = s->matrixtype==1;
30336     return result;
30337 }
30338 
30339 
30340 /*************************************************************************
30341 This function checks matrix storage format and returns True when matrix is
30342 stored using SKS representation.
30343 
30344 INPUT PARAMETERS:
30345     S   -   sparse matrix.
30346 
30347 RESULT:
30348     True if matrix type is SKS
30349     False if matrix type is not SKS
30350 
30351   -- ALGLIB PROJECT --
30352      Copyright 20.07.2012 by Bochkanov Sergey
30353 *************************************************************************/
sparseissks(sparsematrix * s,ae_state * _state)30354 ae_bool sparseissks(sparsematrix* s, ae_state *_state)
30355 {
30356     ae_bool result;
30357 
30358 
30359     ae_assert((((s->matrixtype==0||s->matrixtype==1)||s->matrixtype==2)||s->matrixtype==-10081)||s->matrixtype==-10082, "SparseIsSKS: invalid matrix type", _state);
30360     result = s->matrixtype==2;
30361     return result;
30362 }
30363 
30364 
30365 /*************************************************************************
30366 The function frees all memory occupied by  sparse  matrix.  Sparse  matrix
30367 structure becomes unusable after this call.
30368 
30369 OUTPUT PARAMETERS
30370     S   -   sparse matrix to delete
30371 
30372   -- ALGLIB PROJECT --
30373      Copyright 24.07.2012 by Bochkanov Sergey
30374 *************************************************************************/
sparsefree(sparsematrix * s,ae_state * _state)30375 void sparsefree(sparsematrix* s, ae_state *_state)
30376 {
30377 
30378     _sparsematrix_clear(s);
30379 
30380     s->matrixtype = -1;
30381     s->m = 0;
30382     s->n = 0;
30383     s->nfree = 0;
30384     s->ninitialized = 0;
30385     s->tablesize = 0;
30386 }
30387 
30388 
30389 /*************************************************************************
30390 The function returns number of rows of a sparse matrix.
30391 
30392 RESULT: number of rows of a sparse matrix.
30393 
30394   -- ALGLIB PROJECT --
30395      Copyright 23.08.2012 by Bochkanov Sergey
30396 *************************************************************************/
sparsegetnrows(sparsematrix * s,ae_state * _state)30397 ae_int_t sparsegetnrows(sparsematrix* s, ae_state *_state)
30398 {
30399     ae_int_t result;
30400 
30401 
30402     result = s->m;
30403     return result;
30404 }
30405 
30406 
30407 /*************************************************************************
30408 The function returns number of columns of a sparse matrix.
30409 
30410 RESULT: number of columns of a sparse matrix.
30411 
30412   -- ALGLIB PROJECT --
30413      Copyright 23.08.2012 by Bochkanov Sergey
30414 *************************************************************************/
sparsegetncols(sparsematrix * s,ae_state * _state)30415 ae_int_t sparsegetncols(sparsematrix* s, ae_state *_state)
30416 {
30417     ae_int_t result;
30418 
30419 
30420     result = s->n;
30421     return result;
30422 }
30423 
30424 
30425 /*************************************************************************
30426 The function returns number of strictly upper triangular non-zero elements
30427 in  the  matrix.  It  counts  SYMBOLICALLY non-zero elements, i.e. entries
30428 in the sparse matrix data structure. If some element  has  zero  numerical
30429 value, it is still counted.
30430 
30431 This function has different cost for different types of matrices:
30432 * for hash-based matrices it involves complete pass over entire hash-table
30433   with O(NNZ) cost, where NNZ is number of non-zero elements
30434 * for CRS and SKS matrix types cost of counting is O(N) (N - matrix size).
30435 
30436 RESULT: number of non-zero elements strictly above main diagonal
30437 
30438   -- ALGLIB PROJECT --
30439      Copyright 12.02.2014 by Bochkanov Sergey
30440 *************************************************************************/
sparsegetuppercount(sparsematrix * s,ae_state * _state)30441 ae_int_t sparsegetuppercount(sparsematrix* s, ae_state *_state)
30442 {
30443     ae_int_t sz;
30444     ae_int_t i0;
30445     ae_int_t i;
30446     ae_int_t result;
30447 
30448 
30449     result = -1;
30450     if( s->matrixtype==0 )
30451     {
30452 
30453         /*
30454          * Hash-table matrix
30455          */
30456         result = 0;
30457         sz = s->tablesize;
30458         for(i0=0; i0<=sz-1; i0++)
30459         {
30460             i = s->idx.ptr.p_int[2*i0];
30461             if( i>=0&&s->idx.ptr.p_int[2*i0+1]>i )
30462             {
30463                 result = result+1;
30464             }
30465         }
30466         return result;
30467     }
30468     if( s->matrixtype==1 )
30469     {
30470 
30471         /*
30472          * CRS matrix
30473          */
30474         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGetUpperCount: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
30475         result = 0;
30476         sz = s->m;
30477         for(i=0; i<=sz-1; i++)
30478         {
30479             result = result+(s->ridx.ptr.p_int[i+1]-s->uidx.ptr.p_int[i]);
30480         }
30481         return result;
30482     }
30483     if( s->matrixtype==2 )
30484     {
30485 
30486         /*
30487          * SKS matrix
30488          */
30489         ae_assert(s->m==s->n, "SparseGetUpperCount: non-square SKS matrices are not supported", _state);
30490         result = 0;
30491         sz = s->m;
30492         for(i=0; i<=sz-1; i++)
30493         {
30494             result = result+s->uidx.ptr.p_int[i];
30495         }
30496         return result;
30497     }
30498     ae_assert(ae_false, "SparseGetUpperCount: internal error", _state);
30499     return result;
30500 }
30501 
30502 
30503 /*************************************************************************
30504 The function returns number of strictly lower triangular non-zero elements
30505 in  the  matrix.  It  counts  SYMBOLICALLY non-zero elements, i.e. entries
30506 in the sparse matrix data structure. If some element  has  zero  numerical
30507 value, it is still counted.
30508 
30509 This function has different cost for different types of matrices:
30510 * for hash-based matrices it involves complete pass over entire hash-table
30511   with O(NNZ) cost, where NNZ is number of non-zero elements
30512 * for CRS and SKS matrix types cost of counting is O(N) (N - matrix size).
30513 
30514 RESULT: number of non-zero elements strictly below main diagonal
30515 
30516   -- ALGLIB PROJECT --
30517      Copyright 12.02.2014 by Bochkanov Sergey
30518 *************************************************************************/
sparsegetlowercount(sparsematrix * s,ae_state * _state)30519 ae_int_t sparsegetlowercount(sparsematrix* s, ae_state *_state)
30520 {
30521     ae_int_t sz;
30522     ae_int_t i0;
30523     ae_int_t i;
30524     ae_int_t result;
30525 
30526 
30527     result = -1;
30528     if( s->matrixtype==0 )
30529     {
30530 
30531         /*
30532          * Hash-table matrix
30533          */
30534         result = 0;
30535         sz = s->tablesize;
30536         for(i0=0; i0<=sz-1; i0++)
30537         {
30538             i = s->idx.ptr.p_int[2*i0];
30539             if( i>=0&&s->idx.ptr.p_int[2*i0+1]<i )
30540             {
30541                 result = result+1;
30542             }
30543         }
30544         return result;
30545     }
30546     if( s->matrixtype==1 )
30547     {
30548 
30549         /*
30550          * CRS matrix
30551          */
30552         ae_assert(s->ninitialized==s->ridx.ptr.p_int[s->m], "SparseGetUpperCount: some rows/elements of the CRS matrix were not initialized (you must initialize everything you promised to SparseCreateCRS)", _state);
30553         result = 0;
30554         sz = s->m;
30555         for(i=0; i<=sz-1; i++)
30556         {
30557             result = result+(s->didx.ptr.p_int[i]-s->ridx.ptr.p_int[i]);
30558         }
30559         return result;
30560     }
30561     if( s->matrixtype==2 )
30562     {
30563 
30564         /*
30565          * SKS matrix
30566          */
30567         ae_assert(s->m==s->n, "SparseGetUpperCount: non-square SKS matrices are not supported", _state);
30568         result = 0;
30569         sz = s->m;
30570         for(i=0; i<=sz-1; i++)
30571         {
30572             result = result+s->didx.ptr.p_int[i];
30573         }
30574         return result;
30575     }
30576     ae_assert(ae_false, "SparseGetUpperCount: internal error", _state);
30577     return result;
30578 }
30579 
30580 
30581 /*************************************************************************
30582 Serializer: allocation.
30583 
30584 INTERNAL-ONLY FUNCTION, SUPPORTS ONLY CRS MATRICES
30585 
30586   -- ALGLIB --
30587      Copyright 20.07.2021 by Bochkanov Sergey
30588 *************************************************************************/
sparsealloc(ae_serializer * s,sparsematrix * a,ae_state * _state)30589 void sparsealloc(ae_serializer* s, sparsematrix* a, ae_state *_state)
30590 {
30591     ae_int_t i;
30592     ae_int_t nused;
30593 
30594 
30595     ae_assert((a->matrixtype==0||a->matrixtype==1)||a->matrixtype==2, "SparseAlloc: only CRS/SKS matrices are supported", _state);
30596 
30597     /*
30598      * Header
30599      */
30600     ae_serializer_alloc_entry(s);
30601     ae_serializer_alloc_entry(s);
30602     ae_serializer_alloc_entry(s);
30603 
30604     /*
30605      * Alloc other parameters
30606      */
30607     if( a->matrixtype==0 )
30608     {
30609 
30610         /*
30611          * Alloc Hash
30612          */
30613         nused = 0;
30614         for(i=0; i<=a->tablesize-1; i++)
30615         {
30616             if( a->idx.ptr.p_int[2*i+0]>=0 )
30617             {
30618                 nused = nused+1;
30619             }
30620         }
30621         ae_serializer_alloc_entry(s);
30622         ae_serializer_alloc_entry(s);
30623         ae_serializer_alloc_entry(s);
30624         for(i=0; i<=a->tablesize-1; i++)
30625         {
30626             if( a->idx.ptr.p_int[2*i+0]>=0 )
30627             {
30628                 ae_serializer_alloc_entry(s);
30629                 ae_serializer_alloc_entry(s);
30630                 ae_serializer_alloc_entry(s);
30631             }
30632         }
30633     }
30634     if( a->matrixtype==1 )
30635     {
30636 
30637         /*
30638          * Alloc CRS
30639          */
30640         ae_serializer_alloc_entry(s);
30641         ae_serializer_alloc_entry(s);
30642         ae_serializer_alloc_entry(s);
30643         allocintegerarray(s, &a->ridx, a->m+1, _state);
30644         allocintegerarray(s, &a->idx, a->ridx.ptr.p_int[a->m], _state);
30645         allocrealarray(s, &a->vals, a->ridx.ptr.p_int[a->m], _state);
30646     }
30647     if( a->matrixtype==2 )
30648     {
30649 
30650         /*
30651          * Alloc SKS
30652          */
30653         ae_assert(a->m==a->n, "SparseAlloc: rectangular SKS serialization is not supported", _state);
30654         ae_serializer_alloc_entry(s);
30655         ae_serializer_alloc_entry(s);
30656         allocintegerarray(s, &a->ridx, a->m+1, _state);
30657         allocintegerarray(s, &a->didx, a->n+1, _state);
30658         allocintegerarray(s, &a->uidx, a->n+1, _state);
30659         allocrealarray(s, &a->vals, a->ridx.ptr.p_int[a->m], _state);
30660     }
30661 
30662     /*
30663      * End of stream
30664      */
30665     ae_serializer_alloc_entry(s);
30666 }
30667 
30668 
30669 /*************************************************************************
30670 Serializer: serialization
30671 
30672 INTERNAL-ONLY FUNCTION, SUPPORTS ONLY CRS MATRICES
30673 
30674   -- ALGLIB --
30675      Copyright 20.07.2021 by Bochkanov Sergey
30676 *************************************************************************/
sparseserialize(ae_serializer * s,sparsematrix * a,ae_state * _state)30677 void sparseserialize(ae_serializer* s, sparsematrix* a, ae_state *_state)
30678 {
30679     ae_int_t i;
30680     ae_int_t nused;
30681 
30682 
30683     ae_assert((a->matrixtype==0||a->matrixtype==1)||a->matrixtype==2, "SparseSerialize: only CRS/SKS matrices are supported", _state);
30684 
30685     /*
30686      * Header
30687      */
30688     ae_serializer_serialize_int(s, getsparsematrixserializationcode(_state), _state);
30689     ae_serializer_serialize_int(s, a->matrixtype, _state);
30690     ae_serializer_serialize_int(s, 0, _state);
30691 
30692     /*
30693      * Serialize other parameters
30694      */
30695     if( a->matrixtype==0 )
30696     {
30697 
30698         /*
30699          * Serialize Hash
30700          */
30701         nused = 0;
30702         for(i=0; i<=a->tablesize-1; i++)
30703         {
30704             if( a->idx.ptr.p_int[2*i+0]>=0 )
30705             {
30706                 nused = nused+1;
30707             }
30708         }
30709         ae_serializer_serialize_int(s, a->m, _state);
30710         ae_serializer_serialize_int(s, a->n, _state);
30711         ae_serializer_serialize_int(s, nused, _state);
30712         for(i=0; i<=a->tablesize-1; i++)
30713         {
30714             if( a->idx.ptr.p_int[2*i+0]>=0 )
30715             {
30716                 ae_serializer_serialize_int(s, a->idx.ptr.p_int[2*i+0], _state);
30717                 ae_serializer_serialize_int(s, a->idx.ptr.p_int[2*i+1], _state);
30718                 ae_serializer_serialize_double(s, a->vals.ptr.p_double[i], _state);
30719             }
30720         }
30721     }
30722     if( a->matrixtype==1 )
30723     {
30724 
30725         /*
30726          * Serialize CRS
30727          */
30728         ae_serializer_serialize_int(s, a->m, _state);
30729         ae_serializer_serialize_int(s, a->n, _state);
30730         ae_serializer_serialize_int(s, a->ninitialized, _state);
30731         serializeintegerarray(s, &a->ridx, a->m+1, _state);
30732         serializeintegerarray(s, &a->idx, a->ridx.ptr.p_int[a->m], _state);
30733         serializerealarray(s, &a->vals, a->ridx.ptr.p_int[a->m], _state);
30734     }
30735     if( a->matrixtype==2 )
30736     {
30737 
30738         /*
30739          * Serialize SKS
30740          */
30741         ae_assert(a->m==a->n, "SparseSerialize: rectangular SKS serialization is not supported", _state);
30742         ae_serializer_serialize_int(s, a->m, _state);
30743         ae_serializer_serialize_int(s, a->n, _state);
30744         serializeintegerarray(s, &a->ridx, a->m+1, _state);
30745         serializeintegerarray(s, &a->didx, a->n+1, _state);
30746         serializeintegerarray(s, &a->uidx, a->n+1, _state);
30747         serializerealarray(s, &a->vals, a->ridx.ptr.p_int[a->m], _state);
30748     }
30749 
30750     /*
30751      * End of stream
30752      */
30753     ae_serializer_serialize_int(s, 117, _state);
30754 }
30755 
30756 
30757 /*************************************************************************
30758 Serializer: unserialization
30759 
30760   -- ALGLIB --
30761      Copyright 20.07.2021 by Bochkanov Sergey
30762 *************************************************************************/
sparseunserialize(ae_serializer * s,sparsematrix * a,ae_state * _state)30763 void sparseunserialize(ae_serializer* s,
30764      sparsematrix* a,
30765      ae_state *_state)
30766 {
30767     ae_int_t i;
30768     ae_int_t i0;
30769     ae_int_t i1;
30770     ae_int_t m;
30771     ae_int_t n;
30772     ae_int_t nused;
30773     ae_int_t k;
30774     double v;
30775 
30776     _sparsematrix_clear(a);
30777 
30778 
30779     /*
30780      * Check stream header: scode, matrix type, version type
30781      */
30782     ae_serializer_unserialize_int(s, &k, _state);
30783     ae_assert(k==getsparsematrixserializationcode(_state), "SparseUnserialize: stream header corrupted", _state);
30784     ae_serializer_unserialize_int(s, &a->matrixtype, _state);
30785     ae_assert((a->matrixtype==0||a->matrixtype==1)||a->matrixtype==2, "SparseUnserialize: unexpected matrix type", _state);
30786     ae_serializer_unserialize_int(s, &k, _state);
30787     ae_assert(k==0, "SparseUnserialize: stream header corrupted", _state);
30788 
30789     /*
30790      * Unserialize other parameters
30791      */
30792     if( a->matrixtype==0 )
30793     {
30794 
30795         /*
30796          * Unerialize Hash
30797          */
30798         ae_serializer_unserialize_int(s, &m, _state);
30799         ae_serializer_unserialize_int(s, &n, _state);
30800         ae_serializer_unserialize_int(s, &nused, _state);
30801         sparsecreate(m, n, nused, a, _state);
30802         for(i=0; i<=nused-1; i++)
30803         {
30804             ae_serializer_unserialize_int(s, &i0, _state);
30805             ae_serializer_unserialize_int(s, &i1, _state);
30806             ae_serializer_unserialize_double(s, &v, _state);
30807             sparseset(a, i0, i1, v, _state);
30808         }
30809     }
30810     if( a->matrixtype==1 )
30811     {
30812 
30813         /*
30814          * Unserialize CRS
30815          */
30816         ae_serializer_unserialize_int(s, &a->m, _state);
30817         ae_serializer_unserialize_int(s, &a->n, _state);
30818         ae_serializer_unserialize_int(s, &a->ninitialized, _state);
30819         unserializeintegerarray(s, &a->ridx, _state);
30820         unserializeintegerarray(s, &a->idx, _state);
30821         unserializerealarray(s, &a->vals, _state);
30822         sparseinitduidx(a, _state);
30823     }
30824     if( a->matrixtype==2 )
30825     {
30826 
30827         /*
30828          * Unserialize SKS
30829          */
30830         ae_serializer_unserialize_int(s, &a->m, _state);
30831         ae_serializer_unserialize_int(s, &a->n, _state);
30832         ae_assert(a->m==a->n, "SparseUnserialize: rectangular SKS unserialization is not supported", _state);
30833         unserializeintegerarray(s, &a->ridx, _state);
30834         unserializeintegerarray(s, &a->didx, _state);
30835         unserializeintegerarray(s, &a->uidx, _state);
30836         unserializerealarray(s, &a->vals, _state);
30837     }
30838 
30839     /*
30840      * End of stream
30841      */
30842     ae_serializer_unserialize_int(s, &k, _state);
30843     ae_assert(k==117, "SparseMatrixUnserialize: end-of-stream marker not found", _state);
30844 }
30845 
30846 
30847 /*************************************************************************
30848 This is hash function.
30849 
30850   -- ALGLIB PROJECT --
30851      Copyright 14.10.2011 by Bochkanov Sergey
30852 *************************************************************************/
sparse_hash(ae_int_t i,ae_int_t j,ae_int_t tabsize,ae_state * _state)30853 static ae_int_t sparse_hash(ae_int_t i,
30854      ae_int_t j,
30855      ae_int_t tabsize,
30856      ae_state *_state)
30857 {
30858     ae_frame _frame_block;
30859     hqrndstate r;
30860     ae_int_t result;
30861 
30862     ae_frame_make(_state, &_frame_block);
30863     memset(&r, 0, sizeof(r));
30864     _hqrndstate_init(&r, _state, ae_true);
30865 
30866     hqrndseed(i, j, &r, _state);
30867     result = hqrnduniformi(&r, tabsize, _state);
30868     ae_frame_leave(_state);
30869     return result;
30870 }
30871 
30872 
_sparsematrix_init(void * _p,ae_state * _state,ae_bool make_automatic)30873 void _sparsematrix_init(void* _p, ae_state *_state, ae_bool make_automatic)
30874 {
30875     sparsematrix *p = (sparsematrix*)_p;
30876     ae_touch_ptr((void*)p);
30877     ae_vector_init(&p->vals, 0, DT_REAL, _state, make_automatic);
30878     ae_vector_init(&p->idx, 0, DT_INT, _state, make_automatic);
30879     ae_vector_init(&p->ridx, 0, DT_INT, _state, make_automatic);
30880     ae_vector_init(&p->didx, 0, DT_INT, _state, make_automatic);
30881     ae_vector_init(&p->uidx, 0, DT_INT, _state, make_automatic);
30882 }
30883 
30884 
_sparsematrix_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)30885 void _sparsematrix_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
30886 {
30887     sparsematrix *dst = (sparsematrix*)_dst;
30888     sparsematrix *src = (sparsematrix*)_src;
30889     ae_vector_init_copy(&dst->vals, &src->vals, _state, make_automatic);
30890     ae_vector_init_copy(&dst->idx, &src->idx, _state, make_automatic);
30891     ae_vector_init_copy(&dst->ridx, &src->ridx, _state, make_automatic);
30892     ae_vector_init_copy(&dst->didx, &src->didx, _state, make_automatic);
30893     ae_vector_init_copy(&dst->uidx, &src->uidx, _state, make_automatic);
30894     dst->matrixtype = src->matrixtype;
30895     dst->m = src->m;
30896     dst->n = src->n;
30897     dst->nfree = src->nfree;
30898     dst->ninitialized = src->ninitialized;
30899     dst->tablesize = src->tablesize;
30900 }
30901 
30902 
_sparsematrix_clear(void * _p)30903 void _sparsematrix_clear(void* _p)
30904 {
30905     sparsematrix *p = (sparsematrix*)_p;
30906     ae_touch_ptr((void*)p);
30907     ae_vector_clear(&p->vals);
30908     ae_vector_clear(&p->idx);
30909     ae_vector_clear(&p->ridx);
30910     ae_vector_clear(&p->didx);
30911     ae_vector_clear(&p->uidx);
30912 }
30913 
30914 
_sparsematrix_destroy(void * _p)30915 void _sparsematrix_destroy(void* _p)
30916 {
30917     sparsematrix *p = (sparsematrix*)_p;
30918     ae_touch_ptr((void*)p);
30919     ae_vector_destroy(&p->vals);
30920     ae_vector_destroy(&p->idx);
30921     ae_vector_destroy(&p->ridx);
30922     ae_vector_destroy(&p->didx);
30923     ae_vector_destroy(&p->uidx);
30924 }
30925 
30926 
_sparsebuffers_init(void * _p,ae_state * _state,ae_bool make_automatic)30927 void _sparsebuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
30928 {
30929     sparsebuffers *p = (sparsebuffers*)_p;
30930     ae_touch_ptr((void*)p);
30931     ae_vector_init(&p->d, 0, DT_INT, _state, make_automatic);
30932     ae_vector_init(&p->u, 0, DT_INT, _state, make_automatic);
30933     _sparsematrix_init(&p->s, _state, make_automatic);
30934 }
30935 
30936 
_sparsebuffers_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)30937 void _sparsebuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
30938 {
30939     sparsebuffers *dst = (sparsebuffers*)_dst;
30940     sparsebuffers *src = (sparsebuffers*)_src;
30941     ae_vector_init_copy(&dst->d, &src->d, _state, make_automatic);
30942     ae_vector_init_copy(&dst->u, &src->u, _state, make_automatic);
30943     _sparsematrix_init_copy(&dst->s, &src->s, _state, make_automatic);
30944 }
30945 
30946 
_sparsebuffers_clear(void * _p)30947 void _sparsebuffers_clear(void* _p)
30948 {
30949     sparsebuffers *p = (sparsebuffers*)_p;
30950     ae_touch_ptr((void*)p);
30951     ae_vector_clear(&p->d);
30952     ae_vector_clear(&p->u);
30953     _sparsematrix_clear(&p->s);
30954 }
30955 
30956 
_sparsebuffers_destroy(void * _p)30957 void _sparsebuffers_destroy(void* _p)
30958 {
30959     sparsebuffers *p = (sparsebuffers*)_p;
30960     ae_touch_ptr((void*)p);
30961     ae_vector_destroy(&p->d);
30962     ae_vector_destroy(&p->u);
30963     _sparsematrix_destroy(&p->s);
30964 }
30965 
30966 
30967 #endif
30968 #if defined(AE_COMPILE_HSSCHUR) || !defined(AE_PARTIAL_BUILD)
30969 
30970 
rmatrixinternalschurdecomposition(ae_matrix * h,ae_int_t n,ae_int_t tneeded,ae_int_t zneeded,ae_vector * wr,ae_vector * wi,ae_matrix * z,ae_int_t * info,ae_state * _state)30971 void rmatrixinternalschurdecomposition(/* Real    */ ae_matrix* h,
30972      ae_int_t n,
30973      ae_int_t tneeded,
30974      ae_int_t zneeded,
30975      /* Real    */ ae_vector* wr,
30976      /* Real    */ ae_vector* wi,
30977      /* Real    */ ae_matrix* z,
30978      ae_int_t* info,
30979      ae_state *_state)
30980 {
30981     ae_frame _frame_block;
30982     ae_int_t i;
30983     ae_int_t j;
30984     ae_matrix h1;
30985     ae_matrix z1;
30986     ae_vector wr1;
30987     ae_vector wi1;
30988 
30989     ae_frame_make(_state, &_frame_block);
30990     memset(&h1, 0, sizeof(h1));
30991     memset(&z1, 0, sizeof(z1));
30992     memset(&wr1, 0, sizeof(wr1));
30993     memset(&wi1, 0, sizeof(wi1));
30994     ae_vector_clear(wr);
30995     ae_vector_clear(wi);
30996     *info = 0;
30997     ae_matrix_init(&h1, 0, 0, DT_REAL, _state, ae_true);
30998     ae_matrix_init(&z1, 0, 0, DT_REAL, _state, ae_true);
30999     ae_vector_init(&wr1, 0, DT_REAL, _state, ae_true);
31000     ae_vector_init(&wi1, 0, DT_REAL, _state, ae_true);
31001 
31002 
31003     /*
31004      * Allocate space
31005      */
31006     ae_vector_set_length(wr, n, _state);
31007     ae_vector_set_length(wi, n, _state);
31008     if( zneeded==2 )
31009     {
31010         rmatrixsetlengthatleast(z, n, n, _state);
31011     }
31012 
31013     /*
31014      * MKL version
31015      */
31016     if( rmatrixinternalschurdecompositionmkl(h, n, tneeded, zneeded, wr, wi, z, info, _state) )
31017     {
31018         ae_frame_leave(_state);
31019         return;
31020     }
31021 
31022     /*
31023      * ALGLIB version
31024      */
31025     ae_matrix_set_length(&h1, n+1, n+1, _state);
31026     for(i=0; i<=n-1; i++)
31027     {
31028         for(j=0; j<=n-1; j++)
31029         {
31030             h1.ptr.pp_double[1+i][1+j] = h->ptr.pp_double[i][j];
31031         }
31032     }
31033     if( zneeded==1 )
31034     {
31035         ae_matrix_set_length(&z1, n+1, n+1, _state);
31036         for(i=0; i<=n-1; i++)
31037         {
31038             for(j=0; j<=n-1; j++)
31039             {
31040                 z1.ptr.pp_double[1+i][1+j] = z->ptr.pp_double[i][j];
31041             }
31042         }
31043     }
31044     internalschurdecomposition(&h1, n, tneeded, zneeded, &wr1, &wi1, &z1, info, _state);
31045     for(i=0; i<=n-1; i++)
31046     {
31047         wr->ptr.p_double[i] = wr1.ptr.p_double[i+1];
31048         wi->ptr.p_double[i] = wi1.ptr.p_double[i+1];
31049     }
31050     if( tneeded!=0 )
31051     {
31052         for(i=0; i<=n-1; i++)
31053         {
31054             for(j=0; j<=n-1; j++)
31055             {
31056                 h->ptr.pp_double[i][j] = h1.ptr.pp_double[1+i][1+j];
31057             }
31058         }
31059     }
31060     if( zneeded!=0 )
31061     {
31062         rmatrixsetlengthatleast(z, n, n, _state);
31063         for(i=0; i<=n-1; i++)
31064         {
31065             for(j=0; j<=n-1; j++)
31066             {
31067                 z->ptr.pp_double[i][j] = z1.ptr.pp_double[1+i][1+j];
31068             }
31069         }
31070     }
31071     ae_frame_leave(_state);
31072 }
31073 
31074 
31075 /*************************************************************************
31076 Subroutine performing  the  Schur  decomposition  of  a  matrix  in  upper
31077 Hessenberg form using the QR algorithm with multiple shifts.
31078 
31079 The  source matrix  H  is  represented as  S'*H*S = T, where H - matrix in
31080 upper Hessenberg form,  S - orthogonal matrix (Schur vectors),   T - upper
31081 quasi-triangular matrix (with blocks of sizes  1x1  and  2x2  on  the main
31082 diagonal).
31083 
31084 Input parameters:
31085     H   -   matrix to be decomposed.
31086             Array whose indexes range within [1..N, 1..N].
31087     N   -   size of H, N>=0.
31088 
31089 
31090 Output parameters:
31091     H   -   contains the matrix T.
31092             Array whose indexes range within [1..N, 1..N].
31093             All elements below the blocks on the main diagonal are equal
31094             to 0.
31095     S   -   contains Schur vectors.
31096             Array whose indexes range within [1..N, 1..N].
31097 
31098 Note 1:
31099     The block structure of matrix T could be easily recognized: since  all
31100     the elements  below  the blocks are zeros, the elements a[i+1,i] which
31101     are equal to 0 show the block border.
31102 
31103 Note 2:
31104     the algorithm  performance  depends  on  the  value  of  the  internal
31105     parameter NS of InternalSchurDecomposition  subroutine  which  defines
31106     the number of shifts in the QR algorithm (analog of  the  block  width
31107     in block matrix algorithms in linear algebra). If you require  maximum
31108     performance  on  your  machine,  it  is  recommended  to  adjust  this
31109     parameter manually.
31110 
31111 Result:
31112     True, if the algorithm has converged and the parameters H and S contain
31113         the result.
31114     False, if the algorithm has not converged.
31115 
31116 Algorithm implemented on the basis of subroutine DHSEQR (LAPACK 3.0 library).
31117 *************************************************************************/
upperhessenbergschurdecomposition(ae_matrix * h,ae_int_t n,ae_matrix * s,ae_state * _state)31118 ae_bool upperhessenbergschurdecomposition(/* Real    */ ae_matrix* h,
31119      ae_int_t n,
31120      /* Real    */ ae_matrix* s,
31121      ae_state *_state)
31122 {
31123     ae_frame _frame_block;
31124     ae_vector wi;
31125     ae_vector wr;
31126     ae_int_t info;
31127     ae_bool result;
31128 
31129     ae_frame_make(_state, &_frame_block);
31130     memset(&wi, 0, sizeof(wi));
31131     memset(&wr, 0, sizeof(wr));
31132     ae_matrix_clear(s);
31133     ae_vector_init(&wi, 0, DT_REAL, _state, ae_true);
31134     ae_vector_init(&wr, 0, DT_REAL, _state, ae_true);
31135 
31136     internalschurdecomposition(h, n, 1, 2, &wr, &wi, s, &info, _state);
31137     result = info==0;
31138     ae_frame_leave(_state);
31139     return result;
31140 }
31141 
31142 
internalschurdecomposition(ae_matrix * h,ae_int_t n,ae_int_t tneeded,ae_int_t zneeded,ae_vector * wr,ae_vector * wi,ae_matrix * z,ae_int_t * info,ae_state * _state)31143 void internalschurdecomposition(/* Real    */ ae_matrix* h,
31144      ae_int_t n,
31145      ae_int_t tneeded,
31146      ae_int_t zneeded,
31147      /* Real    */ ae_vector* wr,
31148      /* Real    */ ae_vector* wi,
31149      /* Real    */ ae_matrix* z,
31150      ae_int_t* info,
31151      ae_state *_state)
31152 {
31153     ae_frame _frame_block;
31154     ae_vector work;
31155     ae_int_t i;
31156     ae_int_t i1;
31157     ae_int_t i2;
31158     ae_int_t ierr;
31159     ae_int_t ii;
31160     ae_int_t itemp;
31161     ae_int_t itn;
31162     ae_int_t its;
31163     ae_int_t j;
31164     ae_int_t k;
31165     ae_int_t l;
31166     ae_int_t maxb;
31167     ae_int_t nr;
31168     ae_int_t ns;
31169     ae_int_t nv;
31170     double absw;
31171     double smlnum;
31172     double tau;
31173     double temp;
31174     double tst1;
31175     double ulp;
31176     double unfl;
31177     ae_matrix s;
31178     ae_vector v;
31179     ae_vector vv;
31180     ae_vector workc1;
31181     ae_vector works1;
31182     ae_vector workv3;
31183     ae_vector tmpwr;
31184     ae_vector tmpwi;
31185     ae_bool initz;
31186     ae_bool wantt;
31187     ae_bool wantz;
31188     double cnst;
31189     ae_bool failflag;
31190     ae_int_t p1;
31191     ae_int_t p2;
31192     double vt;
31193 
31194     ae_frame_make(_state, &_frame_block);
31195     memset(&work, 0, sizeof(work));
31196     memset(&s, 0, sizeof(s));
31197     memset(&v, 0, sizeof(v));
31198     memset(&vv, 0, sizeof(vv));
31199     memset(&workc1, 0, sizeof(workc1));
31200     memset(&works1, 0, sizeof(works1));
31201     memset(&workv3, 0, sizeof(workv3));
31202     memset(&tmpwr, 0, sizeof(tmpwr));
31203     memset(&tmpwi, 0, sizeof(tmpwi));
31204     ae_vector_clear(wr);
31205     ae_vector_clear(wi);
31206     *info = 0;
31207     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
31208     ae_matrix_init(&s, 0, 0, DT_REAL, _state, ae_true);
31209     ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
31210     ae_vector_init(&vv, 0, DT_REAL, _state, ae_true);
31211     ae_vector_init(&workc1, 0, DT_REAL, _state, ae_true);
31212     ae_vector_init(&works1, 0, DT_REAL, _state, ae_true);
31213     ae_vector_init(&workv3, 0, DT_REAL, _state, ae_true);
31214     ae_vector_init(&tmpwr, 0, DT_REAL, _state, ae_true);
31215     ae_vector_init(&tmpwi, 0, DT_REAL, _state, ae_true);
31216 
31217 
31218     /*
31219      * Set the order of the multi-shift QR algorithm to be used.
31220      * If you want to tune algorithm, change this values
31221      */
31222     ns = 12;
31223     maxb = 50;
31224 
31225     /*
31226      * Now 2 < NS <= MAXB < NH.
31227      */
31228     maxb = ae_maxint(3, maxb, _state);
31229     ns = ae_minint(maxb, ns, _state);
31230 
31231     /*
31232      * Initialize
31233      */
31234     cnst = 1.5;
31235     ae_vector_set_length(&work, ae_maxint(n, 1, _state)+1, _state);
31236     ae_matrix_set_length(&s, ns+1, ns+1, _state);
31237     ae_vector_set_length(&v, ns+1+1, _state);
31238     ae_vector_set_length(&vv, ns+1+1, _state);
31239     ae_vector_set_length(wr, ae_maxint(n, 1, _state)+1, _state);
31240     ae_vector_set_length(wi, ae_maxint(n, 1, _state)+1, _state);
31241     ae_vector_set_length(&workc1, 1+1, _state);
31242     ae_vector_set_length(&works1, 1+1, _state);
31243     ae_vector_set_length(&workv3, 3+1, _state);
31244     ae_vector_set_length(&tmpwr, ae_maxint(n, 1, _state)+1, _state);
31245     ae_vector_set_length(&tmpwi, ae_maxint(n, 1, _state)+1, _state);
31246     ae_assert(n>=0, "InternalSchurDecomposition: incorrect N!", _state);
31247     ae_assert(tneeded==0||tneeded==1, "InternalSchurDecomposition: incorrect TNeeded!", _state);
31248     ae_assert((zneeded==0||zneeded==1)||zneeded==2, "InternalSchurDecomposition: incorrect ZNeeded!", _state);
31249     wantt = tneeded==1;
31250     initz = zneeded==2;
31251     wantz = zneeded!=0;
31252     *info = 0;
31253 
31254     /*
31255      * Initialize Z, if necessary
31256      */
31257     if( initz )
31258     {
31259         rmatrixsetlengthatleast(z, n+1, n+1, _state);
31260         for(i=1; i<=n; i++)
31261         {
31262             for(j=1; j<=n; j++)
31263             {
31264                 if( i==j )
31265                 {
31266                     z->ptr.pp_double[i][j] = (double)(1);
31267                 }
31268                 else
31269                 {
31270                     z->ptr.pp_double[i][j] = (double)(0);
31271                 }
31272             }
31273         }
31274     }
31275 
31276     /*
31277      * Quick return if possible
31278      */
31279     if( n==0 )
31280     {
31281         ae_frame_leave(_state);
31282         return;
31283     }
31284     if( n==1 )
31285     {
31286         wr->ptr.p_double[1] = h->ptr.pp_double[1][1];
31287         wi->ptr.p_double[1] = (double)(0);
31288         ae_frame_leave(_state);
31289         return;
31290     }
31291 
31292     /*
31293      * Set rows and columns 1 to N to zero below the first
31294      * subdiagonal.
31295      */
31296     for(j=1; j<=n-2; j++)
31297     {
31298         for(i=j+2; i<=n; i++)
31299         {
31300             h->ptr.pp_double[i][j] = (double)(0);
31301         }
31302     }
31303 
31304     /*
31305      * Test if N is sufficiently small
31306      */
31307     if( (ns<=2||ns>n)||maxb>=n )
31308     {
31309 
31310         /*
31311          * Use the standard double-shift algorithm
31312          */
31313         hsschur_internalauxschur(wantt, wantz, n, 1, n, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);
31314 
31315         /*
31316          * fill entries under diagonal blocks of T with zeros
31317          */
31318         if( wantt )
31319         {
31320             j = 1;
31321             while(j<=n)
31322             {
31323                 if( ae_fp_eq(wi->ptr.p_double[j],(double)(0)) )
31324                 {
31325                     for(i=j+1; i<=n; i++)
31326                     {
31327                         h->ptr.pp_double[i][j] = (double)(0);
31328                     }
31329                     j = j+1;
31330                 }
31331                 else
31332                 {
31333                     for(i=j+2; i<=n; i++)
31334                     {
31335                         h->ptr.pp_double[i][j] = (double)(0);
31336                         h->ptr.pp_double[i][j+1] = (double)(0);
31337                     }
31338                     j = j+2;
31339                 }
31340             }
31341         }
31342         ae_frame_leave(_state);
31343         return;
31344     }
31345     unfl = ae_minrealnumber;
31346     ulp = 2*ae_machineepsilon;
31347     smlnum = unfl*(n/ulp);
31348 
31349     /*
31350      * I1 and I2 are the indices of the first row and last column of H
31351      * to which transformations must be applied. If eigenvalues only are
31352      * being computed, I1 and I2 are set inside the main loop.
31353      */
31354     i1 = 1;
31355     i2 = n;
31356 
31357     /*
31358      * ITN is the total number of multiple-shift QR iterations allowed.
31359      */
31360     itn = 30*n;
31361 
31362     /*
31363      * The main loop begins here. I is the loop index and decreases from
31364      * IHI to ILO in steps of at most MAXB. Each iteration of the loop
31365      * works with the active submatrix in rows and columns L to I.
31366      * Eigenvalues I+1 to IHI have already converged. Either L = ILO or
31367      * H(L,L-1) is negligible so that the matrix splits.
31368      */
31369     i = n;
31370     for(;;)
31371     {
31372         l = 1;
31373         if( i<1 )
31374         {
31375 
31376             /*
31377              * fill entries under diagonal blocks of T with zeros
31378              */
31379             if( wantt )
31380             {
31381                 j = 1;
31382                 while(j<=n)
31383                 {
31384                     if( ae_fp_eq(wi->ptr.p_double[j],(double)(0)) )
31385                     {
31386                         for(i=j+1; i<=n; i++)
31387                         {
31388                             h->ptr.pp_double[i][j] = (double)(0);
31389                         }
31390                         j = j+1;
31391                     }
31392                     else
31393                     {
31394                         for(i=j+2; i<=n; i++)
31395                         {
31396                             h->ptr.pp_double[i][j] = (double)(0);
31397                             h->ptr.pp_double[i][j+1] = (double)(0);
31398                         }
31399                         j = j+2;
31400                     }
31401                 }
31402             }
31403 
31404             /*
31405              * Exit
31406              */
31407             ae_frame_leave(_state);
31408             return;
31409         }
31410 
31411         /*
31412          * Perform multiple-shift QR iterations on rows and columns ILO to I
31413          * until a submatrix of order at most MAXB splits off at the bottom
31414          * because a subdiagonal element has become negligible.
31415          */
31416         failflag = ae_true;
31417         for(its=0; its<=itn; its++)
31418         {
31419 
31420             /*
31421              * Look for a single small subdiagonal element.
31422              */
31423             for(k=i; k>=l+1; k--)
31424             {
31425                 tst1 = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state);
31426                 if( ae_fp_eq(tst1,(double)(0)) )
31427                 {
31428                     tst1 = upperhessenberg1norm(h, l, i, l, i, &work, _state);
31429                 }
31430                 if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ae_maxreal(ulp*tst1, smlnum, _state)) )
31431                 {
31432                     break;
31433                 }
31434             }
31435             l = k;
31436             if( l>1 )
31437             {
31438 
31439                 /*
31440                  * H(L,L-1) is negligible.
31441                  */
31442                 h->ptr.pp_double[l][l-1] = (double)(0);
31443             }
31444 
31445             /*
31446              * Exit from loop if a submatrix of order <= MAXB has split off.
31447              */
31448             if( l>=i-maxb+1 )
31449             {
31450                 failflag = ae_false;
31451                 break;
31452             }
31453 
31454             /*
31455              * Now the active submatrix is in rows and columns L to I. If
31456              * eigenvalues only are being computed, only the active submatrix
31457              * need be transformed.
31458              */
31459             if( its==20||its==30 )
31460             {
31461 
31462                 /*
31463                  * Exceptional shifts.
31464                  */
31465                 for(ii=i-ns+1; ii<=i; ii++)
31466                 {
31467                     wr->ptr.p_double[ii] = cnst*(ae_fabs(h->ptr.pp_double[ii][ii-1], _state)+ae_fabs(h->ptr.pp_double[ii][ii], _state));
31468                     wi->ptr.p_double[ii] = (double)(0);
31469                 }
31470             }
31471             else
31472             {
31473 
31474                 /*
31475                  * Use eigenvalues of trailing submatrix of order NS as shifts.
31476                  */
31477                 copymatrix(h, i-ns+1, i, i-ns+1, i, &s, 1, ns, 1, ns, _state);
31478                 hsschur_internalauxschur(ae_false, ae_false, ns, 1, ns, &s, &tmpwr, &tmpwi, 1, ns, z, &work, &workv3, &workc1, &works1, &ierr, _state);
31479                 for(p1=1; p1<=ns; p1++)
31480                 {
31481                     wr->ptr.p_double[i-ns+p1] = tmpwr.ptr.p_double[p1];
31482                     wi->ptr.p_double[i-ns+p1] = tmpwi.ptr.p_double[p1];
31483                 }
31484                 if( ierr>0 )
31485                 {
31486 
31487                     /*
31488                      * If DLAHQR failed to compute all NS eigenvalues, use the
31489                      * unconverged diagonal elements as the remaining shifts.
31490                      */
31491                     for(ii=1; ii<=ierr; ii++)
31492                     {
31493                         wr->ptr.p_double[i-ns+ii] = s.ptr.pp_double[ii][ii];
31494                         wi->ptr.p_double[i-ns+ii] = (double)(0);
31495                     }
31496                 }
31497             }
31498 
31499             /*
31500              * Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
31501              * where G is the Hessenberg submatrix H(L:I,L:I) and w is
31502              * the vector of shifts (stored in WR and WI). The result is
31503              * stored in the local array V.
31504              */
31505             v.ptr.p_double[1] = (double)(1);
31506             for(ii=2; ii<=ns+1; ii++)
31507             {
31508                 v.ptr.p_double[ii] = (double)(0);
31509             }
31510             nv = 1;
31511             for(j=i-ns+1; j<=i; j++)
31512             {
31513                 if( ae_fp_greater_eq(wi->ptr.p_double[j],(double)(0)) )
31514                 {
31515                     if( ae_fp_eq(wi->ptr.p_double[j],(double)(0)) )
31516                     {
31517 
31518                         /*
31519                          * real shift
31520                          */
31521                         p1 = nv+1;
31522                         ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1));
31523                         matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &vv, 1, nv, 1.0, &v, 1, nv+1, -wr->ptr.p_double[j], _state);
31524                         nv = nv+1;
31525                     }
31526                     else
31527                     {
31528                         if( ae_fp_greater(wi->ptr.p_double[j],(double)(0)) )
31529                         {
31530 
31531                             /*
31532                              * complex conjugate pair of shifts
31533                              */
31534                             p1 = nv+1;
31535                             ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1));
31536                             matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &v, 1, nv, 1.0, &vv, 1, nv+1, -2*wr->ptr.p_double[j], _state);
31537                             itemp = vectoridxabsmax(&vv, 1, nv+1, _state);
31538                             temp = 1/ae_maxreal(ae_fabs(vv.ptr.p_double[itemp], _state), smlnum, _state);
31539                             p1 = nv+1;
31540                             ae_v_muld(&vv.ptr.p_double[1], 1, ae_v_len(1,p1), temp);
31541                             absw = pythag2(wr->ptr.p_double[j], wi->ptr.p_double[j], _state);
31542                             temp = temp*absw*absw;
31543                             matrixvectormultiply(h, l, l+nv+1, l, l+nv, ae_false, &vv, 1, nv+1, 1.0, &v, 1, nv+2, temp, _state);
31544                             nv = nv+2;
31545                         }
31546                     }
31547 
31548                     /*
31549                      * Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
31550                      * reset it to the unit vector.
31551                      */
31552                     itemp = vectoridxabsmax(&v, 1, nv, _state);
31553                     temp = ae_fabs(v.ptr.p_double[itemp], _state);
31554                     if( ae_fp_eq(temp,(double)(0)) )
31555                     {
31556                         v.ptr.p_double[1] = (double)(1);
31557                         for(ii=2; ii<=nv; ii++)
31558                         {
31559                             v.ptr.p_double[ii] = (double)(0);
31560                         }
31561                     }
31562                     else
31563                     {
31564                         temp = ae_maxreal(temp, smlnum, _state);
31565                         vt = 1/temp;
31566                         ae_v_muld(&v.ptr.p_double[1], 1, ae_v_len(1,nv), vt);
31567                     }
31568                 }
31569             }
31570 
31571             /*
31572              * Multiple-shift QR step
31573              */
31574             for(k=l; k<=i-1; k++)
31575             {
31576 
31577                 /*
31578                  * The first iteration of this loop determines a reflection G
31579                  * from the vector V and applies it from left and right to H,
31580                  * thus creating a nonzero bulge below the subdiagonal.
31581                  *
31582                  * Each subsequent iteration determines a reflection G to
31583                  * restore the Hessenberg form in the (K-1)th column, and thus
31584                  * chases the bulge one step toward the bottom of the active
31585                  * submatrix. NR is the order of G.
31586                  */
31587                 nr = ae_minint(ns+1, i-k+1, _state);
31588                 if( k>l )
31589                 {
31590                     p1 = k-1;
31591                     p2 = k+nr-1;
31592                     ae_v_move(&v.ptr.p_double[1], 1, &h->ptr.pp_double[k][p1], h->stride, ae_v_len(1,nr));
31593                     touchint(&p2, _state);
31594                 }
31595                 generatereflection(&v, nr, &tau, _state);
31596                 if( k>l )
31597                 {
31598                     h->ptr.pp_double[k][k-1] = v.ptr.p_double[1];
31599                     for(ii=k+1; ii<=i; ii++)
31600                     {
31601                         h->ptr.pp_double[ii][k-1] = (double)(0);
31602                     }
31603                 }
31604                 v.ptr.p_double[1] = (double)(1);
31605 
31606                 /*
31607                  * Apply G from the left to transform the rows of the matrix in
31608                  * columns K to I2.
31609                  */
31610                 applyreflectionfromtheleft(h, tau, &v, k, k+nr-1, k, i2, &work, _state);
31611 
31612                 /*
31613                  * Apply G from the right to transform the columns of the
31614                  * matrix in rows I1 to min(K+NR,I).
31615                  */
31616                 applyreflectionfromtheright(h, tau, &v, i1, ae_minint(k+nr, i, _state), k, k+nr-1, &work, _state);
31617                 if( wantz )
31618                 {
31619 
31620                     /*
31621                      * Accumulate transformations in the matrix Z
31622                      */
31623                     applyreflectionfromtheright(z, tau, &v, 1, n, k, k+nr-1, &work, _state);
31624                 }
31625             }
31626         }
31627 
31628         /*
31629          * Failure to converge in remaining number of iterations
31630          */
31631         if( failflag )
31632         {
31633             *info = i;
31634             ae_frame_leave(_state);
31635             return;
31636         }
31637 
31638         /*
31639          * A submatrix of order <= MAXB in rows and columns L to I has split
31640          * off. Use the double-shift QR algorithm to handle it.
31641          */
31642         hsschur_internalauxschur(wantt, wantz, n, l, i, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);
31643         if( *info>0 )
31644         {
31645             ae_frame_leave(_state);
31646             return;
31647         }
31648 
31649         /*
31650          * Decrement number of remaining iterations, and return to start of
31651          * the main loop with a new value of I.
31652          */
31653         itn = itn-its;
31654         i = l-1;
31655 
31656         /*
31657          * Block below is never executed; it is necessary just to avoid
31658          * "unreachable code" warning about automatically generated code.
31659          *
31660          * We just need a way to transfer control to the end of the function,
31661          * even a fake way which is never actually traversed.
31662          */
31663         if( alwaysfalse(_state) )
31664         {
31665             ae_assert(ae_false, "Assertion failed", _state);
31666             break;
31667         }
31668     }
31669     ae_frame_leave(_state);
31670 }
31671 
31672 
31673 /*************************************************************************
31674 Translation of DLAHQR from LAPACK.
31675 *************************************************************************/
hsschur_internalauxschur(ae_bool wantt,ae_bool wantz,ae_int_t n,ae_int_t ilo,ae_int_t ihi,ae_matrix * h,ae_vector * wr,ae_vector * wi,ae_int_t iloz,ae_int_t ihiz,ae_matrix * z,ae_vector * work,ae_vector * workv3,ae_vector * workc1,ae_vector * works1,ae_int_t * info,ae_state * _state)31676 static void hsschur_internalauxschur(ae_bool wantt,
31677      ae_bool wantz,
31678      ae_int_t n,
31679      ae_int_t ilo,
31680      ae_int_t ihi,
31681      /* Real    */ ae_matrix* h,
31682      /* Real    */ ae_vector* wr,
31683      /* Real    */ ae_vector* wi,
31684      ae_int_t iloz,
31685      ae_int_t ihiz,
31686      /* Real    */ ae_matrix* z,
31687      /* Real    */ ae_vector* work,
31688      /* Real    */ ae_vector* workv3,
31689      /* Real    */ ae_vector* workc1,
31690      /* Real    */ ae_vector* works1,
31691      ae_int_t* info,
31692      ae_state *_state)
31693 {
31694     double safmin;
31695     double tst;
31696     double ab;
31697     double ba;
31698     double aa;
31699     double bb;
31700     double rt1r;
31701     double rt1i;
31702     double rt2r;
31703     double rt2i;
31704     double tr;
31705     double det;
31706     double rtdisc;
31707     double h21s;
31708     ae_int_t i;
31709     ae_int_t i1;
31710     ae_int_t i2;
31711     ae_int_t itmax;
31712     ae_int_t its;
31713     ae_int_t j;
31714     ae_int_t k;
31715     ae_int_t l;
31716     ae_int_t m;
31717     ae_int_t nh;
31718     ae_int_t nr;
31719     ae_int_t nz;
31720     double cs;
31721     double h11;
31722     double h12;
31723     double h21;
31724     double h22;
31725     double s;
31726     double smlnum;
31727     double sn;
31728     double sum;
31729     double t1;
31730     double t2;
31731     double t3;
31732     double v2;
31733     double v3;
31734     ae_bool failflag;
31735     double dat1;
31736     double dat2;
31737     ae_int_t p1;
31738     double him1im1;
31739     double him1i;
31740     double hiim1;
31741     double hii;
31742     double wrim1;
31743     double wri;
31744     double wiim1;
31745     double wii;
31746     double ulp;
31747 
31748     *info = 0;
31749 
31750     *info = 0;
31751     dat1 = 0.75;
31752     dat2 = -0.4375;
31753 
31754     /*
31755      * Quick return if possible
31756      */
31757     if( n==0 )
31758     {
31759         return;
31760     }
31761     if( ilo==ihi )
31762     {
31763         wr->ptr.p_double[ilo] = h->ptr.pp_double[ilo][ilo];
31764         wi->ptr.p_double[ilo] = (double)(0);
31765         return;
31766     }
31767 
31768     /*
31769      * ==== clear out the trash ====
31770      */
31771     for(j=ilo; j<=ihi-3; j++)
31772     {
31773         h->ptr.pp_double[j+2][j] = (double)(0);
31774         h->ptr.pp_double[j+3][j] = (double)(0);
31775     }
31776     if( ilo<=ihi-2 )
31777     {
31778         h->ptr.pp_double[ihi][ihi-2] = (double)(0);
31779     }
31780     nh = ihi-ilo+1;
31781     nz = ihiz-iloz+1;
31782 
31783     /*
31784      * Set machine-dependent constants for the stopping criterion.
31785      */
31786     safmin = ae_minrealnumber;
31787     ulp = ae_machineepsilon;
31788     smlnum = safmin*(nh/ulp);
31789 
31790     /*
31791      * I1 and I2 are the indices of the first row and last column of H
31792      * to which transformations must be applied. If eigenvalues only are
31793      * being computed, I1 and I2 are set inside the main loop.
31794      *
31795      * Setting them to large negative value helps to debug possible errors
31796      * due to uninitialized variables; also it helps to avoid compiler
31797      * warnings.
31798      */
31799     i1 = -99999;
31800     i2 = -99999;
31801     if( wantt )
31802     {
31803         i1 = 1;
31804         i2 = n;
31805     }
31806 
31807     /*
31808      * ITMAX is the total number of QR iterations allowed.
31809      */
31810     itmax = 30*ae_maxint(10, nh, _state);
31811 
31812     /*
31813      * The main loop begins here. I is the loop index and decreases from
31814      * IHI to ILO in steps of 1 or 2. Each iteration of the loop works
31815      * with the active submatrix in rows and columns L to I.
31816      * Eigenvalues I+1 to IHI have already converged. Either L = ILO or
31817      * H(L,L-1) is negligible so that the matrix splits.
31818      */
31819     i = ihi;
31820     for(;;)
31821     {
31822         l = ilo;
31823         if( i<ilo )
31824         {
31825             return;
31826         }
31827 
31828         /*
31829          * Perform QR iterations on rows and columns ILO to I until a
31830          * submatrix of order 1 or 2 splits off at the bottom because a
31831          * subdiagonal element has become negligible.
31832          */
31833         failflag = ae_true;
31834         for(its=0; its<=itmax; its++)
31835         {
31836 
31837             /*
31838              * Look for a single small subdiagonal element.
31839              */
31840             for(k=i; k>=l+1; k--)
31841             {
31842                 if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),smlnum) )
31843                 {
31844                     break;
31845                 }
31846                 tst = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state);
31847                 if( ae_fp_eq(tst,(double)(0)) )
31848                 {
31849                     if( k-2>=ilo )
31850                     {
31851                         tst = tst+ae_fabs(h->ptr.pp_double[k-1][k-2], _state);
31852                     }
31853                     if( k+1<=ihi )
31854                     {
31855                         tst = tst+ae_fabs(h->ptr.pp_double[k+1][k], _state);
31856                     }
31857                 }
31858 
31859                 /*
31860                  * ==== The following is a conservative small subdiagonal
31861                  * .    deflation  criterion due to Ahues & Tisseur (LAWN 122,
31862                  * .    1997). It has better mathematical foundation and
31863                  * .    improves accuracy in some cases.  ====
31864                  */
31865                 if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ulp*tst) )
31866                 {
31867                     ab = ae_maxreal(ae_fabs(h->ptr.pp_double[k][k-1], _state), ae_fabs(h->ptr.pp_double[k-1][k], _state), _state);
31868                     ba = ae_minreal(ae_fabs(h->ptr.pp_double[k][k-1], _state), ae_fabs(h->ptr.pp_double[k-1][k], _state), _state);
31869                     aa = ae_maxreal(ae_fabs(h->ptr.pp_double[k][k], _state), ae_fabs(h->ptr.pp_double[k-1][k-1]-h->ptr.pp_double[k][k], _state), _state);
31870                     bb = ae_minreal(ae_fabs(h->ptr.pp_double[k][k], _state), ae_fabs(h->ptr.pp_double[k-1][k-1]-h->ptr.pp_double[k][k], _state), _state);
31871                     s = aa+ab;
31872                     if( ae_fp_less_eq(ba*(ab/s),ae_maxreal(smlnum, ulp*(bb*(aa/s)), _state)) )
31873                     {
31874                         break;
31875                     }
31876                 }
31877             }
31878             l = k;
31879             if( l>ilo )
31880             {
31881 
31882                 /*
31883                  * H(L,L-1) is negligible
31884                  */
31885                 h->ptr.pp_double[l][l-1] = (double)(0);
31886             }
31887 
31888             /*
31889              * Exit from loop if a submatrix of order 1 or 2 has split off.
31890              */
31891             if( l>=i-1 )
31892             {
31893                 failflag = ae_false;
31894                 break;
31895             }
31896 
31897             /*
31898              * Now the active submatrix is in rows and columns L to I. If
31899              * eigenvalues only are being computed, only the active submatrix
31900              * need be transformed.
31901              */
31902             if( !wantt )
31903             {
31904                 i1 = l;
31905                 i2 = i;
31906             }
31907 
31908             /*
31909              * Shifts
31910              */
31911             if( its==10 )
31912             {
31913 
31914                 /*
31915                  * Exceptional shift.
31916                  */
31917                 s = ae_fabs(h->ptr.pp_double[l+1][l], _state)+ae_fabs(h->ptr.pp_double[l+2][l+1], _state);
31918                 h11 = dat1*s+h->ptr.pp_double[l][l];
31919                 h12 = dat2*s;
31920                 h21 = s;
31921                 h22 = h11;
31922             }
31923             else
31924             {
31925                 if( its==20 )
31926                 {
31927 
31928                     /*
31929                      * Exceptional shift.
31930                      */
31931                     s = ae_fabs(h->ptr.pp_double[i][i-1], _state)+ae_fabs(h->ptr.pp_double[i-1][i-2], _state);
31932                     h11 = dat1*s+h->ptr.pp_double[i][i];
31933                     h12 = dat2*s;
31934                     h21 = s;
31935                     h22 = h11;
31936                 }
31937                 else
31938                 {
31939 
31940                     /*
31941                      * Prepare to use Francis' double shift
31942                      * (i.e. 2nd degree generalized Rayleigh quotient)
31943                      */
31944                     h11 = h->ptr.pp_double[i-1][i-1];
31945                     h21 = h->ptr.pp_double[i][i-1];
31946                     h12 = h->ptr.pp_double[i-1][i];
31947                     h22 = h->ptr.pp_double[i][i];
31948                 }
31949             }
31950             s = ae_fabs(h11, _state)+ae_fabs(h12, _state)+ae_fabs(h21, _state)+ae_fabs(h22, _state);
31951             if( ae_fp_eq(s,(double)(0)) )
31952             {
31953                 rt1r = (double)(0);
31954                 rt1i = (double)(0);
31955                 rt2r = (double)(0);
31956                 rt2i = (double)(0);
31957             }
31958             else
31959             {
31960                 h11 = h11/s;
31961                 h21 = h21/s;
31962                 h12 = h12/s;
31963                 h22 = h22/s;
31964                 tr = (h11+h22)/2;
31965                 det = (h11-tr)*(h22-tr)-h12*h21;
31966                 rtdisc = ae_sqrt(ae_fabs(det, _state), _state);
31967                 if( ae_fp_greater_eq(det,(double)(0)) )
31968                 {
31969 
31970                     /*
31971                      * ==== complex conjugate shifts ====
31972                      */
31973                     rt1r = tr*s;
31974                     rt2r = rt1r;
31975                     rt1i = rtdisc*s;
31976                     rt2i = -rt1i;
31977                 }
31978                 else
31979                 {
31980 
31981                     /*
31982                      * ==== real shifts (use only one of them)  ====
31983                      */
31984                     rt1r = tr+rtdisc;
31985                     rt2r = tr-rtdisc;
31986                     if( ae_fp_less_eq(ae_fabs(rt1r-h22, _state),ae_fabs(rt2r-h22, _state)) )
31987                     {
31988                         rt1r = rt1r*s;
31989                         rt2r = rt1r;
31990                     }
31991                     else
31992                     {
31993                         rt2r = rt2r*s;
31994                         rt1r = rt2r;
31995                     }
31996                     rt1i = (double)(0);
31997                     rt2i = (double)(0);
31998                 }
31999             }
32000 
32001             /*
32002              * Look for two consecutive small subdiagonal elements.
32003              */
32004             for(m=i-2; m>=l; m--)
32005             {
32006 
32007                 /*
32008                  * Determine the effect of starting the double-shift QR
32009                  * iteration at row M, and see if this would make H(M,M-1)
32010                  * negligible.  (The following uses scaling to avoid
32011                  * overflows and most underflows.)
32012                  */
32013                 h21s = h->ptr.pp_double[m+1][m];
32014                 s = ae_fabs(h->ptr.pp_double[m][m]-rt2r, _state)+ae_fabs(rt2i, _state)+ae_fabs(h21s, _state);
32015                 h21s = h->ptr.pp_double[m+1][m]/s;
32016                 workv3->ptr.p_double[1] = h21s*h->ptr.pp_double[m][m+1]+(h->ptr.pp_double[m][m]-rt1r)*((h->ptr.pp_double[m][m]-rt2r)/s)-rt1i*(rt2i/s);
32017                 workv3->ptr.p_double[2] = h21s*(h->ptr.pp_double[m][m]+h->ptr.pp_double[m+1][m+1]-rt1r-rt2r);
32018                 workv3->ptr.p_double[3] = h21s*h->ptr.pp_double[m+2][m+1];
32019                 s = ae_fabs(workv3->ptr.p_double[1], _state)+ae_fabs(workv3->ptr.p_double[2], _state)+ae_fabs(workv3->ptr.p_double[3], _state);
32020                 workv3->ptr.p_double[1] = workv3->ptr.p_double[1]/s;
32021                 workv3->ptr.p_double[2] = workv3->ptr.p_double[2]/s;
32022                 workv3->ptr.p_double[3] = workv3->ptr.p_double[3]/s;
32023                 if( m==l )
32024                 {
32025                     break;
32026                 }
32027                 if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[m][m-1], _state)*(ae_fabs(workv3->ptr.p_double[2], _state)+ae_fabs(workv3->ptr.p_double[3], _state)),ulp*ae_fabs(workv3->ptr.p_double[1], _state)*(ae_fabs(h->ptr.pp_double[m-1][m-1], _state)+ae_fabs(h->ptr.pp_double[m][m], _state)+ae_fabs(h->ptr.pp_double[m+1][m+1], _state))) )
32028                 {
32029                     break;
32030                 }
32031             }
32032 
32033             /*
32034              * Double-shift QR step
32035              */
32036             for(k=m; k<=i-1; k++)
32037             {
32038 
32039                 /*
32040                  * The first iteration of this loop determines a reflection G
32041                  * from the vector V and applies it from left and right to H,
32042                  * thus creating a nonzero bulge below the subdiagonal.
32043                  *
32044                  * Each subsequent iteration determines a reflection G to
32045                  * restore the Hessenberg form in the (K-1)th column, and thus
32046                  * chases the bulge one step toward the bottom of the active
32047                  * submatrix. NR is the order of G.
32048                  */
32049                 nr = ae_minint(3, i-k+1, _state);
32050                 if( k>m )
32051                 {
32052                     for(p1=1; p1<=nr; p1++)
32053                     {
32054                         workv3->ptr.p_double[p1] = h->ptr.pp_double[k+p1-1][k-1];
32055                     }
32056                 }
32057                 generatereflection(workv3, nr, &t1, _state);
32058                 if( k>m )
32059                 {
32060                     h->ptr.pp_double[k][k-1] = workv3->ptr.p_double[1];
32061                     h->ptr.pp_double[k+1][k-1] = (double)(0);
32062                     if( k<i-1 )
32063                     {
32064                         h->ptr.pp_double[k+2][k-1] = (double)(0);
32065                     }
32066                 }
32067                 else
32068                 {
32069                     if( m>l )
32070                     {
32071 
32072                         /*
32073                          * ==== Use the following instead of
32074                          * H( K, K-1 ) = -H( K, K-1 ) to
32075                          * avoid a bug when v(2) and v(3)
32076                          * underflow. ====
32077                          */
32078                         h->ptr.pp_double[k][k-1] = h->ptr.pp_double[k][k-1]*(1-t1);
32079                     }
32080                 }
32081                 v2 = workv3->ptr.p_double[2];
32082                 t2 = t1*v2;
32083                 if( nr==3 )
32084                 {
32085                     v3 = workv3->ptr.p_double[3];
32086                     t3 = t1*v3;
32087 
32088                     /*
32089                      * Apply G from the left to transform the rows of the matrix
32090                      * in columns K to I2.
32091                      */
32092                     for(j=k; j<=i2; j++)
32093                     {
32094                         sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j]+v3*h->ptr.pp_double[k+2][j];
32095                         h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1;
32096                         h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2;
32097                         h->ptr.pp_double[k+2][j] = h->ptr.pp_double[k+2][j]-sum*t3;
32098                     }
32099 
32100                     /*
32101                      * Apply G from the right to transform the columns of the
32102                      * matrix in rows I1 to min(K+3,I).
32103                      */
32104                     for(j=i1; j<=ae_minint(k+3, i, _state); j++)
32105                     {
32106                         sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1]+v3*h->ptr.pp_double[j][k+2];
32107                         h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1;
32108                         h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2;
32109                         h->ptr.pp_double[j][k+2] = h->ptr.pp_double[j][k+2]-sum*t3;
32110                     }
32111                     if( wantz )
32112                     {
32113 
32114                         /*
32115                          * Accumulate transformations in the matrix Z
32116                          */
32117                         for(j=iloz; j<=ihiz; j++)
32118                         {
32119                             sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1]+v3*z->ptr.pp_double[j][k+2];
32120                             z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1;
32121                             z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2;
32122                             z->ptr.pp_double[j][k+2] = z->ptr.pp_double[j][k+2]-sum*t3;
32123                         }
32124                     }
32125                 }
32126                 else
32127                 {
32128                     if( nr==2 )
32129                     {
32130 
32131                         /*
32132                          * Apply G from the left to transform the rows of the matrix
32133                          * in columns K to I2.
32134                          */
32135                         for(j=k; j<=i2; j++)
32136                         {
32137                             sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j];
32138                             h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1;
32139                             h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2;
32140                         }
32141 
32142                         /*
32143                          * Apply G from the right to transform the columns of the
32144                          * matrix in rows I1 to min(K+3,I).
32145                          */
32146                         for(j=i1; j<=i; j++)
32147                         {
32148                             sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1];
32149                             h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1;
32150                             h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2;
32151                         }
32152                         if( wantz )
32153                         {
32154 
32155                             /*
32156                              * Accumulate transformations in the matrix Z
32157                              */
32158                             for(j=iloz; j<=ihiz; j++)
32159                             {
32160                                 sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1];
32161                                 z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1;
32162                                 z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2;
32163                             }
32164                         }
32165                     }
32166                 }
32167             }
32168         }
32169 
32170         /*
32171          * Failure to converge in remaining number of iterations
32172          */
32173         if( failflag )
32174         {
32175             *info = i;
32176             return;
32177         }
32178 
32179         /*
32180          * Convergence
32181          */
32182         if( l==i )
32183         {
32184 
32185             /*
32186              * H(I,I-1) is negligible: one eigenvalue has converged.
32187              */
32188             wr->ptr.p_double[i] = h->ptr.pp_double[i][i];
32189             wi->ptr.p_double[i] = (double)(0);
32190         }
32191         else
32192         {
32193             if( l==i-1 )
32194             {
32195 
32196                 /*
32197                  * H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
32198                  *
32199                  * Transform the 2-by-2 submatrix to standard Schur form,
32200                  * and compute and store the eigenvalues.
32201                  */
32202                 him1im1 = h->ptr.pp_double[i-1][i-1];
32203                 him1i = h->ptr.pp_double[i-1][i];
32204                 hiim1 = h->ptr.pp_double[i][i-1];
32205                 hii = h->ptr.pp_double[i][i];
32206                 hsschur_aux2x2schur(&him1im1, &him1i, &hiim1, &hii, &wrim1, &wiim1, &wri, &wii, &cs, &sn, _state);
32207                 wr->ptr.p_double[i-1] = wrim1;
32208                 wi->ptr.p_double[i-1] = wiim1;
32209                 wr->ptr.p_double[i] = wri;
32210                 wi->ptr.p_double[i] = wii;
32211                 h->ptr.pp_double[i-1][i-1] = him1im1;
32212                 h->ptr.pp_double[i-1][i] = him1i;
32213                 h->ptr.pp_double[i][i-1] = hiim1;
32214                 h->ptr.pp_double[i][i] = hii;
32215                 if( wantt )
32216                 {
32217 
32218                     /*
32219                      * Apply the transformation to the rest of H.
32220                      */
32221                     if( i2>i )
32222                     {
32223                         workc1->ptr.p_double[1] = cs;
32224                         works1->ptr.p_double[1] = sn;
32225                         applyrotationsfromtheleft(ae_true, i-1, i, i+1, i2, workc1, works1, h, work, _state);
32226                     }
32227                     workc1->ptr.p_double[1] = cs;
32228                     works1->ptr.p_double[1] = sn;
32229                     applyrotationsfromtheright(ae_true, i1, i-2, i-1, i, workc1, works1, h, work, _state);
32230                 }
32231                 if( wantz )
32232                 {
32233 
32234                     /*
32235                      * Apply the transformation to Z.
32236                      */
32237                     workc1->ptr.p_double[1] = cs;
32238                     works1->ptr.p_double[1] = sn;
32239                     applyrotationsfromtheright(ae_true, iloz, iloz+nz-1, i-1, i, workc1, works1, z, work, _state);
32240                 }
32241             }
32242         }
32243 
32244         /*
32245          * return to start of the main loop with new value of I.
32246          */
32247         i = l-1;
32248     }
32249 }
32250 
32251 
hsschur_aux2x2schur(double * a,double * b,double * c,double * d,double * rt1r,double * rt1i,double * rt2r,double * rt2i,double * cs,double * sn,ae_state * _state)32252 static void hsschur_aux2x2schur(double* a,
32253      double* b,
32254      double* c,
32255      double* d,
32256      double* rt1r,
32257      double* rt1i,
32258      double* rt2r,
32259      double* rt2i,
32260      double* cs,
32261      double* sn,
32262      ae_state *_state)
32263 {
32264     double multpl;
32265     double aa;
32266     double bb;
32267     double bcmax;
32268     double bcmis;
32269     double cc;
32270     double cs1;
32271     double dd;
32272     double eps;
32273     double p;
32274     double sab;
32275     double sac;
32276     double scl;
32277     double sigma;
32278     double sn1;
32279     double tau;
32280     double temp;
32281     double z;
32282 
32283     *rt1r = 0;
32284     *rt1i = 0;
32285     *rt2r = 0;
32286     *rt2i = 0;
32287     *cs = 0;
32288     *sn = 0;
32289 
32290     multpl = 4.0;
32291     eps = ae_machineepsilon;
32292     if( ae_fp_eq(*c,(double)(0)) )
32293     {
32294         *cs = (double)(1);
32295         *sn = (double)(0);
32296     }
32297     else
32298     {
32299         if( ae_fp_eq(*b,(double)(0)) )
32300         {
32301 
32302             /*
32303              * Swap rows and columns
32304              */
32305             *cs = (double)(0);
32306             *sn = (double)(1);
32307             temp = *d;
32308             *d = *a;
32309             *a = temp;
32310             *b = -*c;
32311             *c = (double)(0);
32312         }
32313         else
32314         {
32315             if( ae_fp_eq(*a-(*d),(double)(0))&&hsschur_extschursigntoone(*b, _state)!=hsschur_extschursigntoone(*c, _state) )
32316             {
32317                 *cs = (double)(1);
32318                 *sn = (double)(0);
32319             }
32320             else
32321             {
32322                 temp = *a-(*d);
32323                 p = 0.5*temp;
32324                 bcmax = ae_maxreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state);
32325                 bcmis = ae_minreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state)*hsschur_extschursigntoone(*b, _state)*hsschur_extschursigntoone(*c, _state);
32326                 scl = ae_maxreal(ae_fabs(p, _state), bcmax, _state);
32327                 z = p/scl*p+bcmax/scl*bcmis;
32328 
32329                 /*
32330                  * If Z is of the order of the machine accuracy, postpone the
32331                  * decision on the nature of eigenvalues
32332                  */
32333                 if( ae_fp_greater_eq(z,multpl*eps) )
32334                 {
32335 
32336                     /*
32337                      * Real eigenvalues. Compute A and D.
32338                      */
32339                     z = p+hsschur_extschursign(ae_sqrt(scl, _state)*ae_sqrt(z, _state), p, _state);
32340                     *a = *d+z;
32341                     *d = *d-bcmax/z*bcmis;
32342 
32343                     /*
32344                      * Compute B and the rotation matrix
32345                      */
32346                     tau = pythag2(*c, z, _state);
32347                     *cs = z/tau;
32348                     *sn = *c/tau;
32349                     *b = *b-(*c);
32350                     *c = (double)(0);
32351                 }
32352                 else
32353                 {
32354 
32355                     /*
32356                      * Complex eigenvalues, or real (almost) equal eigenvalues.
32357                      * Make diagonal elements equal.
32358                      */
32359                     sigma = *b+(*c);
32360                     tau = pythag2(sigma, temp, _state);
32361                     *cs = ae_sqrt(0.5*(1+ae_fabs(sigma, _state)/tau), _state);
32362                     *sn = -p/(tau*(*cs))*hsschur_extschursign((double)(1), sigma, _state);
32363 
32364                     /*
32365                      * Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
32366                      *         [ CC  DD ]   [ C  D ] [ SN  CS ]
32367                      */
32368                     aa = *a*(*cs)+*b*(*sn);
32369                     bb = -*a*(*sn)+*b*(*cs);
32370                     cc = *c*(*cs)+*d*(*sn);
32371                     dd = -*c*(*sn)+*d*(*cs);
32372 
32373                     /*
32374                      * Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
32375                      *         [ C  D ]   [-SN  CS ] [ CC  DD ]
32376                      */
32377                     *a = aa*(*cs)+cc*(*sn);
32378                     *b = bb*(*cs)+dd*(*sn);
32379                     *c = -aa*(*sn)+cc*(*cs);
32380                     *d = -bb*(*sn)+dd*(*cs);
32381                     temp = 0.5*(*a+(*d));
32382                     *a = temp;
32383                     *d = temp;
32384                     if( ae_fp_neq(*c,(double)(0)) )
32385                     {
32386                         if( ae_fp_neq(*b,(double)(0)) )
32387                         {
32388                             if( hsschur_extschursigntoone(*b, _state)==hsschur_extschursigntoone(*c, _state) )
32389                             {
32390 
32391                                 /*
32392                                  * Real eigenvalues: reduce to upper triangular form
32393                                  */
32394                                 sab = ae_sqrt(ae_fabs(*b, _state), _state);
32395                                 sac = ae_sqrt(ae_fabs(*c, _state), _state);
32396                                 p = hsschur_extschursign(sab*sac, *c, _state);
32397                                 tau = 1/ae_sqrt(ae_fabs(*b+(*c), _state), _state);
32398                                 *a = temp+p;
32399                                 *d = temp-p;
32400                                 *b = *b-(*c);
32401                                 *c = (double)(0);
32402                                 cs1 = sab*tau;
32403                                 sn1 = sac*tau;
32404                                 temp = *cs*cs1-*sn*sn1;
32405                                 *sn = *cs*sn1+*sn*cs1;
32406                                 *cs = temp;
32407                             }
32408                         }
32409                         else
32410                         {
32411                             *b = -*c;
32412                             *c = (double)(0);
32413                             temp = *cs;
32414                             *cs = -*sn;
32415                             *sn = temp;
32416                         }
32417                     }
32418                 }
32419             }
32420         }
32421     }
32422 
32423     /*
32424      * Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I).
32425      */
32426     *rt1r = *a;
32427     *rt2r = *d;
32428     if( ae_fp_eq(*c,(double)(0)) )
32429     {
32430         *rt1i = (double)(0);
32431         *rt2i = (double)(0);
32432     }
32433     else
32434     {
32435         *rt1i = ae_sqrt(ae_fabs(*b, _state), _state)*ae_sqrt(ae_fabs(*c, _state), _state);
32436         *rt2i = -*rt1i;
32437     }
32438 }
32439 
32440 
hsschur_extschursign(double a,double b,ae_state * _state)32441 static double hsschur_extschursign(double a, double b, ae_state *_state)
32442 {
32443     double result;
32444 
32445 
32446     if( ae_fp_greater_eq(b,(double)(0)) )
32447     {
32448         result = ae_fabs(a, _state);
32449     }
32450     else
32451     {
32452         result = -ae_fabs(a, _state);
32453     }
32454     return result;
32455 }
32456 
32457 
hsschur_extschursigntoone(double b,ae_state * _state)32458 static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state)
32459 {
32460     ae_int_t result;
32461 
32462 
32463     if( ae_fp_greater_eq(b,(double)(0)) )
32464     {
32465         result = 1;
32466     }
32467     else
32468     {
32469         result = -1;
32470     }
32471     return result;
32472 }
32473 
32474 
32475 #endif
32476 #if defined(AE_COMPILE_EVD) || !defined(AE_PARTIAL_BUILD)
32477 
32478 
32479 /*************************************************************************
32480 This function initializes subspace iteration solver. This solver  is  used
32481 to solve symmetric real eigenproblems where just a few (top K) eigenvalues
32482 and corresponding eigenvectors is required.
32483 
32484 This solver can be significantly faster than  complete  EVD  decomposition
32485 in the following case:
32486 * when only just a small fraction  of  top  eigenpairs  of dense matrix is
32487   required. When K approaches N, this solver is slower than complete dense
32488   EVD
32489 * when problem matrix is sparse (and/or is not known explicitly, i.e. only
32490   matrix-matrix product can be performed)
32491 
32492 USAGE (explicit dense/sparse matrix):
32493 1. User initializes algorithm state with eigsubspacecreate() call
32494 2. [optional] User tunes solver parameters by calling eigsubspacesetcond()
32495    or other functions
32496 3. User  calls  eigsubspacesolvedense() or eigsubspacesolvesparse() methods,
32497    which take algorithm state and 2D array or alglib.sparsematrix object.
32498 
32499 USAGE (out-of-core mode):
32500 1. User initializes algorithm state with eigsubspacecreate() call
32501 2. [optional] User tunes solver parameters by calling eigsubspacesetcond()
32502    or other functions
32503 3. User activates out-of-core mode of  the  solver  and  repeatedly  calls
32504    communication functions in a loop like below:
32505    > alglib.eigsubspaceoocstart(state)
32506    > while alglib.eigsubspaceooccontinue(state) do
32507    >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
32508    >     alglib.eigsubspaceoocgetrequestdata(state, out X)
32509    >     [calculate  Y=A*X, with X=R^NxM]
32510    >     alglib.eigsubspaceoocsendresult(state, in Y)
32511    > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
32512 
32513 INPUT PARAMETERS:
32514     N       -   problem dimensionality, N>0
32515     K       -   number of top eigenvector to calculate, 0<K<=N.
32516 
32517 OUTPUT PARAMETERS:
32518     State   -   structure which stores algorithm state
32519 
32520 NOTE: if you solve many similar EVD problems you may  find  it  useful  to
32521       reuse previous subspace as warm-start point for new EVD problem.  It
32522       can be done with eigsubspacesetwarmstart() function.
32523 
32524   -- ALGLIB --
32525      Copyright 16.01.2017 by Bochkanov Sergey
32526 *************************************************************************/
eigsubspacecreate(ae_int_t n,ae_int_t k,eigsubspacestate * state,ae_state * _state)32527 void eigsubspacecreate(ae_int_t n,
32528      ae_int_t k,
32529      eigsubspacestate* state,
32530      ae_state *_state)
32531 {
32532 
32533     _eigsubspacestate_clear(state);
32534 
32535     ae_assert(n>0, "EigSubspaceCreate: N<=0", _state);
32536     ae_assert(k>0, "EigSubspaceCreate: K<=0", _state);
32537     ae_assert(k<=n, "EigSubspaceCreate: K>N", _state);
32538     eigsubspacecreatebuf(n, k, state, _state);
32539 }
32540 
32541 
32542 /*************************************************************************
32543 Buffered version of constructor which aims to reuse  previously  allocated
32544 memory as much as possible.
32545 
32546   -- ALGLIB --
32547      Copyright 16.01.2017 by Bochkanov Sergey
32548 *************************************************************************/
eigsubspacecreatebuf(ae_int_t n,ae_int_t k,eigsubspacestate * state,ae_state * _state)32549 void eigsubspacecreatebuf(ae_int_t n,
32550      ae_int_t k,
32551      eigsubspacestate* state,
32552      ae_state *_state)
32553 {
32554 
32555 
32556     ae_assert(n>0, "EigSubspaceCreate: N<=0", _state);
32557     ae_assert(k>0, "EigSubspaceCreate: K<=0", _state);
32558     ae_assert(k<=n, "EigSubspaceCreate: K>N", _state);
32559 
32560     /*
32561      * Initialize algorithm parameters
32562      */
32563     state->running = ae_false;
32564     state->n = n;
32565     state->k = k;
32566     state->nwork = ae_minint(ae_maxint(2*k, 8, _state), n, _state);
32567     state->eigenvectorsneeded = 1;
32568     state->usewarmstart = ae_false;
32569     state->firstcall = ae_true;
32570     eigsubspacesetcond(state, 0.0, 0, _state);
32571 
32572     /*
32573      * Allocate temporaries
32574      */
32575     rmatrixsetlengthatleast(&state->x, state->n, state->nwork, _state);
32576     rmatrixsetlengthatleast(&state->ax, state->n, state->nwork, _state);
32577 }
32578 
32579 
32580 /*************************************************************************
32581 This function sets stopping critera for the solver:
32582 * error in eigenvector/value allowed by solver
32583 * maximum number of iterations to perform
32584 
32585 INPUT PARAMETERS:
32586     State       -   solver structure
32587     Eps         -   eps>=0,  with non-zero value used to tell solver  that
32588                     it can  stop  after  all  eigenvalues  converged  with
32589                     error  roughly  proportional  to  eps*MAX(LAMBDA_MAX),
32590                     where LAMBDA_MAX is a maximum eigenvalue.
32591                     Zero  value  means  that  no  check  for  precision is
32592                     performed.
32593     MaxIts      -   maxits>=0,  with non-zero value used  to  tell  solver
32594                     that it can stop after maxits  steps  (no  matter  how
32595                     precise current estimate is)
32596 
32597 NOTE: passing  eps=0  and  maxits=0  results  in  automatic  selection  of
32598       moderate eps as stopping criteria (1.0E-6 in current implementation,
32599       but it may change without notice).
32600 
32601 NOTE: very small values of eps are possible (say, 1.0E-12),  although  the
32602       larger problem you solve (N and/or K), the  harder  it  is  to  find
32603       precise eigenvectors because rounding errors tend to accumulate.
32604 
32605 NOTE: passing non-zero eps results in  some performance  penalty,  roughly
32606       equal to 2N*(2K)^2 FLOPs per iteration. These additional computations
32607       are required in order to estimate current error in  eigenvalues  via
32608       Rayleigh-Ritz process.
32609       Most of this additional time is  spent  in  construction  of  ~2Kx2K
32610       symmetric  subproblem  whose  eigenvalues  are  checked  with  exact
32611       eigensolver.
32612       This additional time is negligible if you search for eigenvalues  of
32613       the large dense matrix, but may become noticeable on  highly  sparse
32614       EVD problems, where cost of matrix-matrix product is low.
32615       If you set eps to exactly zero,  Rayleigh-Ritz  phase  is completely
32616       turned off.
32617 
32618   -- ALGLIB --
32619      Copyright 16.01.2017 by Bochkanov Sergey
32620 *************************************************************************/
eigsubspacesetcond(eigsubspacestate * state,double eps,ae_int_t maxits,ae_state * _state)32621 void eigsubspacesetcond(eigsubspacestate* state,
32622      double eps,
32623      ae_int_t maxits,
32624      ae_state *_state)
32625 {
32626 
32627 
32628     ae_assert(!state->running, "EigSubspaceSetCond: solver is already running", _state);
32629     ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "EigSubspaceSetCond: Eps<0 or NAN/INF", _state);
32630     ae_assert(maxits>=0, "EigSubspaceSetCond: MaxIts<0", _state);
32631     if( ae_fp_eq(eps,(double)(0))&&maxits==0 )
32632     {
32633         eps = 1.0E-6;
32634     }
32635     state->eps = eps;
32636     state->maxits = maxits;
32637 }
32638 
32639 
32640 /*************************************************************************
32641 This function sets warm-start mode of the solver: next call to the  solver
32642 will reuse previous subspace as warm-start  point.  It  can  significantly
32643 speed-up convergence when you solve many similar eigenproblems.
32644 
32645 INPUT PARAMETERS:
32646     State       -   solver structure
32647     UseWarmStart-   either True or False
32648 
32649   -- ALGLIB --
32650      Copyright 12.11.2017 by Bochkanov Sergey
32651 *************************************************************************/
eigsubspacesetwarmstart(eigsubspacestate * state,ae_bool usewarmstart,ae_state * _state)32652 void eigsubspacesetwarmstart(eigsubspacestate* state,
32653      ae_bool usewarmstart,
32654      ae_state *_state)
32655 {
32656 
32657 
32658     ae_assert(!state->running, "EigSubspaceSetWarmStart: solver is already running", _state);
32659     state->usewarmstart = usewarmstart;
32660 }
32661 
32662 
32663 /*************************************************************************
32664 This  function  initiates  out-of-core  mode  of  subspace eigensolver. It
32665 should be used in conjunction with other out-of-core-related functions  of
32666 this subspackage in a loop like below:
32667 
32668 > alglib.eigsubspaceoocstart(state)
32669 > while alglib.eigsubspaceooccontinue(state) do
32670 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
32671 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
32672 >     [calculate  Y=A*X, with X=R^NxM]
32673 >     alglib.eigsubspaceoocsendresult(state, in Y)
32674 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
32675 
32676 INPUT PARAMETERS:
32677     State       -   solver object
32678     MType       -   matrix type:
32679                     * 0 for real  symmetric  matrix  (solver  assumes that
32680                       matrix  being   processed  is  symmetric;  symmetric
32681                       direct eigensolver is used for  smaller  subproblems
32682                       arising during solution of larger "full" task)
32683                     Future versions of ALGLIB may  introduce  support  for
32684                     other  matrix   types;   for   now,   only   symmetric
32685                     eigenproblems are supported.
32686 
32687 
32688   -- ALGLIB --
32689      Copyright 16.01.2017 by Bochkanov Sergey
32690 *************************************************************************/
eigsubspaceoocstart(eigsubspacestate * state,ae_int_t mtype,ae_state * _state)32691 void eigsubspaceoocstart(eigsubspacestate* state,
32692      ae_int_t mtype,
32693      ae_state *_state)
32694 {
32695 
32696 
32697     ae_assert(!state->running, "EigSubspaceStart: solver is already running", _state);
32698     ae_assert(mtype==0, "EigSubspaceStart: incorrect mtype parameter", _state);
32699     ae_vector_set_length(&state->rstate.ia, 7+1, _state);
32700     ae_vector_set_length(&state->rstate.ra, 1+1, _state);
32701     state->rstate.stage = -1;
32702     evd_clearrfields(state, _state);
32703     state->running = ae_true;
32704     state->matrixtype = mtype;
32705 }
32706 
32707 
32708 /*************************************************************************
32709 This function performs subspace iteration  in  the  out-of-core  mode.  It
32710 should be used in conjunction with other out-of-core-related functions  of
32711 this subspackage in a loop like below:
32712 
32713 > alglib.eigsubspaceoocstart(state)
32714 > while alglib.eigsubspaceooccontinue(state) do
32715 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
32716 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
32717 >     [calculate  Y=A*X, with X=R^NxM]
32718 >     alglib.eigsubspaceoocsendresult(state, in Y)
32719 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
32720 
32721 
32722   -- ALGLIB --
32723      Copyright 16.01.2017 by Bochkanov Sergey
32724 *************************************************************************/
eigsubspaceooccontinue(eigsubspacestate * state,ae_state * _state)32725 ae_bool eigsubspaceooccontinue(eigsubspacestate* state, ae_state *_state)
32726 {
32727     ae_bool result;
32728 
32729 
32730     ae_assert(state->running, "EigSubspaceContinue: solver is not running", _state);
32731     result = eigsubspaceiteration(state, _state);
32732     state->running = result;
32733     return result;
32734 }
32735 
32736 
32737 /*************************************************************************
32738 This function is used to retrieve information  about  out-of-core  request
32739 sent by solver to user code: request type (current version  of  the solver
32740 sends only requests for matrix-matrix products) and request size (size  of
32741 the matrices being multiplied).
32742 
32743 This function returns just request metrics; in order  to  get contents  of
32744 the matrices being multiplied, use eigsubspaceoocgetrequestdata().
32745 
32746 It should be used in conjunction with other out-of-core-related  functions
32747 of this subspackage in a loop like below:
32748 
32749 > alglib.eigsubspaceoocstart(state)
32750 > while alglib.eigsubspaceooccontinue(state) do
32751 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
32752 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
32753 >     [calculate  Y=A*X, with X=R^NxM]
32754 >     alglib.eigsubspaceoocsendresult(state, in Y)
32755 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
32756 
32757 INPUT PARAMETERS:
32758     State           -   solver running in out-of-core mode
32759 
32760 OUTPUT PARAMETERS:
32761     RequestType     -   type of the request to process:
32762                         * 0 - for matrix-matrix product A*X, with A  being
32763                           NxN matrix whose eigenvalues/vectors are needed,
32764                           and X being NxREQUESTSIZE one which is  returned
32765                           by the eigsubspaceoocgetrequestdata().
32766     RequestSize     -   size of the X matrix (number of columns),  usually
32767                         it is several times larger than number of  vectors
32768                         K requested by user.
32769 
32770 
32771   -- ALGLIB --
32772      Copyright 16.01.2017 by Bochkanov Sergey
32773 *************************************************************************/
eigsubspaceoocgetrequestinfo(eigsubspacestate * state,ae_int_t * requesttype,ae_int_t * requestsize,ae_state * _state)32774 void eigsubspaceoocgetrequestinfo(eigsubspacestate* state,
32775      ae_int_t* requesttype,
32776      ae_int_t* requestsize,
32777      ae_state *_state)
32778 {
32779 
32780     *requesttype = 0;
32781     *requestsize = 0;
32782 
32783     ae_assert(state->running, "EigSubspaceOOCGetRequestInfo: solver is not running", _state);
32784     *requesttype = state->requesttype;
32785     *requestsize = state->requestsize;
32786 }
32787 
32788 
32789 /*************************************************************************
32790 This function is used to retrieve information  about  out-of-core  request
32791 sent by solver to user code: matrix X (array[N,RequestSize) which have  to
32792 be multiplied by out-of-core matrix A in a product A*X.
32793 
32794 This function returns just request data; in order to get size of  the data
32795 prior to processing requestm, use eigsubspaceoocgetrequestinfo().
32796 
32797 It should be used in conjunction with other out-of-core-related  functions
32798 of this subspackage in a loop like below:
32799 
32800 > alglib.eigsubspaceoocstart(state)
32801 > while alglib.eigsubspaceooccontinue(state) do
32802 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
32803 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
32804 >     [calculate  Y=A*X, with X=R^NxM]
32805 >     alglib.eigsubspaceoocsendresult(state, in Y)
32806 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
32807 
32808 INPUT PARAMETERS:
32809     State           -   solver running in out-of-core mode
32810     X               -   possibly  preallocated   storage;  reallocated  if
32811                         needed, left unchanged, if large enough  to  store
32812                         request data.
32813 
32814 OUTPUT PARAMETERS:
32815     X               -   array[N,RequestSize] or larger, leading  rectangle
32816                         is filled with dense matrix X.
32817 
32818 
32819   -- ALGLIB --
32820      Copyright 16.01.2017 by Bochkanov Sergey
32821 *************************************************************************/
eigsubspaceoocgetrequestdata(eigsubspacestate * state,ae_matrix * x,ae_state * _state)32822 void eigsubspaceoocgetrequestdata(eigsubspacestate* state,
32823      /* Real    */ ae_matrix* x,
32824      ae_state *_state)
32825 {
32826     ae_int_t i;
32827     ae_int_t j;
32828 
32829 
32830     ae_assert(state->running, "EigSubspaceOOCGetRequestInfo: solver is not running", _state);
32831     rmatrixsetlengthatleast(x, state->n, state->requestsize, _state);
32832     for(i=0; i<=state->n-1; i++)
32833     {
32834         for(j=0; j<=state->requestsize-1; j++)
32835         {
32836             x->ptr.pp_double[i][j] = state->x.ptr.pp_double[i][j];
32837         }
32838     }
32839 }
32840 
32841 
32842 /*************************************************************************
32843 This function is used to send user reply to out-of-core  request  sent  by
32844 solver. Usually it is product A*X for returned by solver matrix X.
32845 
32846 It should be used in conjunction with other out-of-core-related  functions
32847 of this subspackage in a loop like below:
32848 
32849 > alglib.eigsubspaceoocstart(state)
32850 > while alglib.eigsubspaceooccontinue(state) do
32851 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
32852 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
32853 >     [calculate  Y=A*X, with X=R^NxM]
32854 >     alglib.eigsubspaceoocsendresult(state, in Y)
32855 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
32856 
32857 INPUT PARAMETERS:
32858     State           -   solver running in out-of-core mode
32859     AX              -   array[N,RequestSize] or larger, leading  rectangle
32860                         is filled with product A*X.
32861 
32862 
32863   -- ALGLIB --
32864      Copyright 16.01.2017 by Bochkanov Sergey
32865 *************************************************************************/
eigsubspaceoocsendresult(eigsubspacestate * state,ae_matrix * ax,ae_state * _state)32866 void eigsubspaceoocsendresult(eigsubspacestate* state,
32867      /* Real    */ ae_matrix* ax,
32868      ae_state *_state)
32869 {
32870     ae_int_t i;
32871     ae_int_t j;
32872 
32873 
32874     ae_assert(state->running, "EigSubspaceOOCGetRequestInfo: solver is not running", _state);
32875     for(i=0; i<=state->n-1; i++)
32876     {
32877         for(j=0; j<=state->requestsize-1; j++)
32878         {
32879             state->ax.ptr.pp_double[i][j] = ax->ptr.pp_double[i][j];
32880         }
32881     }
32882 }
32883 
32884 
32885 /*************************************************************************
32886 This  function  finalizes out-of-core  mode  of  subspace eigensolver.  It
32887 should be used in conjunction with other out-of-core-related functions  of
32888 this subspackage in a loop like below:
32889 
32890 > alglib.eigsubspaceoocstart(state)
32891 > while alglib.eigsubspaceooccontinue(state) do
32892 >     alglib.eigsubspaceoocgetrequestinfo(state, out RequestType, out M)
32893 >     alglib.eigsubspaceoocgetrequestdata(state, out X)
32894 >     [calculate  Y=A*X, with X=R^NxM]
32895 >     alglib.eigsubspaceoocsendresult(state, in Y)
32896 > alglib.eigsubspaceoocstop(state, out W, out Z, out Report)
32897 
32898 INPUT PARAMETERS:
32899     State       -   solver state
32900 
32901 OUTPUT PARAMETERS:
32902     W           -   array[K], depending on solver settings:
32903                     * top  K  eigenvalues ordered  by  descending   -   if
32904                       eigenvectors are returned in Z
32905                     * zeros - if invariant subspace is returned in Z
32906     Z           -   array[N,K], depending on solver settings either:
32907                     * matrix of eigenvectors found
32908                     * orthogonal basis of K-dimensional invariant subspace
32909     Rep         -   report with additional parameters
32910 
32911   -- ALGLIB --
32912      Copyright 16.01.2017 by Bochkanov Sergey
32913 *************************************************************************/
eigsubspaceoocstop(eigsubspacestate * state,ae_vector * w,ae_matrix * z,eigsubspacereport * rep,ae_state * _state)32914 void eigsubspaceoocstop(eigsubspacestate* state,
32915      /* Real    */ ae_vector* w,
32916      /* Real    */ ae_matrix* z,
32917      eigsubspacereport* rep,
32918      ae_state *_state)
32919 {
32920     ae_int_t n;
32921     ae_int_t k;
32922     ae_int_t i;
32923     ae_int_t j;
32924 
32925     ae_vector_clear(w);
32926     ae_matrix_clear(z);
32927     _eigsubspacereport_clear(rep);
32928 
32929     ae_assert(!state->running, "EigSubspaceStop: solver is still running", _state);
32930     n = state->n;
32931     k = state->k;
32932     ae_vector_set_length(w, k, _state);
32933     ae_matrix_set_length(z, n, k, _state);
32934     for(i=0; i<=k-1; i++)
32935     {
32936         w->ptr.p_double[i] = state->rw.ptr.p_double[i];
32937     }
32938     for(i=0; i<=n-1; i++)
32939     {
32940         for(j=0; j<=k-1; j++)
32941         {
32942             z->ptr.pp_double[i][j] = state->rq.ptr.pp_double[i][j];
32943         }
32944     }
32945     rep->iterationscount = state->repiterationscount;
32946 }
32947 
32948 
32949 /*************************************************************************
32950 This  function runs subspace eigensolver for dense NxN symmetric matrix A,
32951 given by its upper or lower triangle.
32952 
32953 This function can not process nonsymmetric matrices.
32954 
32955 INPUT PARAMETERS:
32956     State       -   solver state
32957     A           -   array[N,N], symmetric NxN matrix given by one  of  its
32958                     triangles
32959     IsUpper     -   whether upper or lower triangle of  A  is  given  (the
32960                     other one is not referenced at all).
32961 
32962 OUTPUT PARAMETERS:
32963     W           -   array[K], top  K  eigenvalues ordered  by   descending
32964                     of their absolute values
32965     Z           -   array[N,K], matrix of eigenvectors found
32966     Rep         -   report with additional parameters
32967 
32968 NOTE: internally this function allocates a copy of NxN dense A. You should
32969       take it into account when working with very large matrices occupying
32970       almost all RAM.
32971 
32972   ! FREE EDITION OF ALGLIB:
32973   !
32974   ! Free Edition of ALGLIB supports following important features for  this
32975   ! function:
32976   ! * C++ version: x64 SIMD support using C++ intrinsics
32977   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
32978   !
32979   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
32980   ! Reference Manual in order  to  find  out  how to activate SIMD support
32981   ! in ALGLIB.
32982 
32983   ! COMMERCIAL EDITION OF ALGLIB:
32984   !
32985   ! Commercial Edition of ALGLIB includes following important improvements
32986   ! of this function:
32987   ! * high-performance native backend with same C# interface (C# version)
32988   ! * multithreading support (C++ and C# versions)
32989   ! * hardware vendor (Intel) implementations of linear algebra primitives
32990   !   (C++ and C# versions, x86/x64 platform)
32991   !
32992   ! We recommend you to read 'Working with commercial version' section  of
32993   ! ALGLIB Reference Manual in order to find out how to  use  performance-
32994   ! related features provided by commercial edition of ALGLIB.
32995 
32996   -- ALGLIB --
32997      Copyright 16.01.2017 by Bochkanov Sergey
32998 *************************************************************************/
eigsubspacesolvedenses(eigsubspacestate * state,ae_matrix * a,ae_bool isupper,ae_vector * w,ae_matrix * z,eigsubspacereport * rep,ae_state * _state)32999 void eigsubspacesolvedenses(eigsubspacestate* state,
33000      /* Real    */ ae_matrix* a,
33001      ae_bool isupper,
33002      /* Real    */ ae_vector* w,
33003      /* Real    */ ae_matrix* z,
33004      eigsubspacereport* rep,
33005      ae_state *_state)
33006 {
33007     ae_frame _frame_block;
33008     ae_int_t n;
33009     ae_int_t m;
33010     ae_int_t i;
33011     ae_int_t j;
33012     ae_int_t k;
33013     double v;
33014     ae_matrix acopy;
33015 
33016     ae_frame_make(_state, &_frame_block);
33017     memset(&acopy, 0, sizeof(acopy));
33018     ae_vector_clear(w);
33019     ae_matrix_clear(z);
33020     _eigsubspacereport_clear(rep);
33021     ae_matrix_init(&acopy, 0, 0, DT_REAL, _state, ae_true);
33022 
33023     ae_assert(!state->running, "EigSubspaceSolveDenseS: solver is still running", _state);
33024     n = state->n;
33025 
33026     /*
33027      * Allocate copy of A, copy one triangle to another
33028      */
33029     ae_matrix_set_length(&acopy, n, n, _state);
33030     for(i=0; i<=n-1; i++)
33031     {
33032         for(j=i; j<=n-1; j++)
33033         {
33034             if( isupper )
33035             {
33036                 v = a->ptr.pp_double[i][j];
33037             }
33038             else
33039             {
33040                 v = a->ptr.pp_double[j][i];
33041             }
33042             acopy.ptr.pp_double[i][j] = v;
33043             acopy.ptr.pp_double[j][i] = v;
33044         }
33045     }
33046 
33047     /*
33048      * Start iterations
33049      */
33050     state->matrixtype = 0;
33051     ae_vector_set_length(&state->rstate.ia, 7+1, _state);
33052     ae_vector_set_length(&state->rstate.ra, 1+1, _state);
33053     state->rstate.stage = -1;
33054     evd_clearrfields(state, _state);
33055     while(eigsubspaceiteration(state, _state))
33056     {
33057 
33058         /*
33059          * Calculate A*X with RMatrixGEMM
33060          */
33061         ae_assert(state->requesttype==0, "EigSubspaceSolveDense: integrity check failed", _state);
33062         ae_assert(state->requestsize>0, "EigSubspaceSolveDense: integrity check failed", _state);
33063         m = state->requestsize;
33064         rmatrixgemm(n, m, n, 1.0, &acopy, 0, 0, 0, &state->x, 0, 0, 0, 0.0, &state->ax, 0, 0, _state);
33065     }
33066     k = state->k;
33067     ae_vector_set_length(w, k, _state);
33068     ae_matrix_set_length(z, n, k, _state);
33069     for(i=0; i<=k-1; i++)
33070     {
33071         w->ptr.p_double[i] = state->rw.ptr.p_double[i];
33072     }
33073     for(i=0; i<=n-1; i++)
33074     {
33075         for(j=0; j<=k-1; j++)
33076         {
33077             z->ptr.pp_double[i][j] = state->rq.ptr.pp_double[i][j];
33078         }
33079     }
33080     rep->iterationscount = state->repiterationscount;
33081     ae_frame_leave(_state);
33082 }
33083 
33084 
33085 /*************************************************************************
33086 This  function runs eigensolver for dense NxN symmetric matrix A, given by
33087 upper or lower triangle.
33088 
33089 This function can not process nonsymmetric matrices.
33090 
33091 INPUT PARAMETERS:
33092     State       -   solver state
33093     A           -   NxN symmetric matrix given by one of its triangles
33094     IsUpper     -   whether upper or lower triangle of  A  is  given  (the
33095                     other one is not referenced at all).
33096 
33097 OUTPUT PARAMETERS:
33098     W           -   array[K], top  K  eigenvalues ordered  by   descending
33099                     of their absolute values
33100     Z           -   array[N,K], matrix of eigenvectors found
33101     Rep         -   report with additional parameters
33102 
33103   -- ALGLIB --
33104      Copyright 16.01.2017 by Bochkanov Sergey
33105 *************************************************************************/
eigsubspacesolvesparses(eigsubspacestate * state,sparsematrix * a,ae_bool isupper,ae_vector * w,ae_matrix * z,eigsubspacereport * rep,ae_state * _state)33106 void eigsubspacesolvesparses(eigsubspacestate* state,
33107      sparsematrix* a,
33108      ae_bool isupper,
33109      /* Real    */ ae_vector* w,
33110      /* Real    */ ae_matrix* z,
33111      eigsubspacereport* rep,
33112      ae_state *_state)
33113 {
33114     ae_int_t n;
33115     ae_int_t i;
33116     ae_int_t j;
33117     ae_int_t k;
33118 
33119     ae_vector_clear(w);
33120     ae_matrix_clear(z);
33121     _eigsubspacereport_clear(rep);
33122 
33123     ae_assert(!state->running, "EigSubspaceSolveSparseS: solver is still running", _state);
33124     n = state->n;
33125     state->matrixtype = 0;
33126     ae_vector_set_length(&state->rstate.ia, 7+1, _state);
33127     ae_vector_set_length(&state->rstate.ra, 1+1, _state);
33128     state->rstate.stage = -1;
33129     evd_clearrfields(state, _state);
33130     while(eigsubspaceiteration(state, _state))
33131     {
33132         ae_assert(state->requesttype==0, "EigSubspaceSolveDense: integrity check failed", _state);
33133         ae_assert(state->requestsize>0, "EigSubspaceSolveDense: integrity check failed", _state);
33134         sparsesmm(a, isupper, &state->x, state->requestsize, &state->ax, _state);
33135     }
33136     k = state->k;
33137     ae_vector_set_length(w, k, _state);
33138     ae_matrix_set_length(z, n, k, _state);
33139     for(i=0; i<=k-1; i++)
33140     {
33141         w->ptr.p_double[i] = state->rw.ptr.p_double[i];
33142     }
33143     for(i=0; i<=n-1; i++)
33144     {
33145         for(j=0; j<=k-1; j++)
33146         {
33147             z->ptr.pp_double[i][j] = state->rq.ptr.pp_double[i][j];
33148         }
33149     }
33150     rep->iterationscount = state->repiterationscount;
33151 }
33152 
33153 
33154 /*************************************************************************
33155 Internal r-comm function.
33156 
33157   -- ALGLIB --
33158      Copyright 16.01.2017 by Bochkanov Sergey
33159 *************************************************************************/
eigsubspaceiteration(eigsubspacestate * state,ae_state * _state)33160 ae_bool eigsubspaceiteration(eigsubspacestate* state, ae_state *_state)
33161 {
33162     ae_int_t n;
33163     ae_int_t nwork;
33164     ae_int_t k;
33165     ae_int_t cnt;
33166     ae_int_t i;
33167     ae_int_t i1;
33168     ae_int_t j;
33169     double vv;
33170     double v;
33171     ae_int_t convcnt;
33172     ae_bool result;
33173 
33174 
33175 
33176     /*
33177      * Reverse communication preparations
33178      * I know it looks ugly, but it works the same way
33179      * anywhere from C++ to Python.
33180      *
33181      * This code initializes locals by:
33182      * * random values determined during code
33183      *   generation - on first subroutine call
33184      * * values from previous call - on subsequent calls
33185      */
33186     if( state->rstate.stage>=0 )
33187     {
33188         n = state->rstate.ia.ptr.p_int[0];
33189         nwork = state->rstate.ia.ptr.p_int[1];
33190         k = state->rstate.ia.ptr.p_int[2];
33191         cnt = state->rstate.ia.ptr.p_int[3];
33192         i = state->rstate.ia.ptr.p_int[4];
33193         i1 = state->rstate.ia.ptr.p_int[5];
33194         j = state->rstate.ia.ptr.p_int[6];
33195         convcnt = state->rstate.ia.ptr.p_int[7];
33196         vv = state->rstate.ra.ptr.p_double[0];
33197         v = state->rstate.ra.ptr.p_double[1];
33198     }
33199     else
33200     {
33201         n = 359;
33202         nwork = -58;
33203         k = -919;
33204         cnt = -909;
33205         i = 81;
33206         i1 = 255;
33207         j = 74;
33208         convcnt = -788;
33209         vv = 809;
33210         v = 205;
33211     }
33212     if( state->rstate.stage==0 )
33213     {
33214         goto lbl_0;
33215     }
33216 
33217     /*
33218      * Routine body
33219      */
33220     n = state->n;
33221     k = state->k;
33222     nwork = state->nwork;
33223 
33224     /*
33225      * Initialize RNG. Deterministic initialization (with fixed
33226      * seed) is required because we need deterministic behavior
33227      * of the entire solver.
33228      */
33229     hqrndseed(453, 463664, &state->rs, _state);
33230 
33231     /*
33232      * Prepare iteration
33233      * Initialize QNew with random orthogonal matrix (or reuse its previous value).
33234      */
33235     state->repiterationscount = 0;
33236     rmatrixsetlengthatleast(&state->qcur, nwork, n, _state);
33237     rmatrixsetlengthatleast(&state->qnew, nwork, n, _state);
33238     rmatrixsetlengthatleast(&state->znew, nwork, n, _state);
33239     rvectorsetlengthatleast(&state->wcur, nwork, _state);
33240     rvectorsetlengthatleast(&state->wprev, nwork, _state);
33241     rvectorsetlengthatleast(&state->wrank, nwork, _state);
33242     rmatrixsetlengthatleast(&state->x, n, nwork, _state);
33243     rmatrixsetlengthatleast(&state->ax, n, nwork, _state);
33244     rmatrixsetlengthatleast(&state->rq, n, k, _state);
33245     rvectorsetlengthatleast(&state->rw, k, _state);
33246     rmatrixsetlengthatleast(&state->rz, nwork, k, _state);
33247     rmatrixsetlengthatleast(&state->r, nwork, nwork, _state);
33248     for(i=0; i<=nwork-1; i++)
33249     {
33250         state->wprev.ptr.p_double[i] = -1.0;
33251     }
33252     if( !state->usewarmstart||state->firstcall )
33253     {
33254 
33255         /*
33256          * Use Q0 (either no warm start request, or warm start was
33257          * requested by user - but it is first call).
33258          *
33259          */
33260         if( state->firstcall )
33261         {
33262 
33263             /*
33264              * First call, generate Q0
33265              */
33266             for(i=0; i<=nwork-1; i++)
33267             {
33268                 for(j=0; j<=n-1; j++)
33269                 {
33270                     state->znew.ptr.pp_double[i][j] = hqrnduniformr(&state->rs, _state)-0.5;
33271                 }
33272             }
33273             rmatrixlq(&state->znew, nwork, n, &state->tau, _state);
33274             rmatrixlqunpackq(&state->znew, nwork, n, &state->tau, nwork, &state->q0, _state);
33275             state->firstcall = ae_false;
33276         }
33277         rmatrixcopy(nwork, n, &state->q0, 0, 0, &state->qnew, 0, 0, _state);
33278     }
33279 
33280     /*
33281      * Start iteration
33282      */
33283     state->repiterationscount = 0;
33284     convcnt = 0;
33285 lbl_1:
33286     if( !((state->maxits==0||state->repiterationscount<state->maxits)&&convcnt<evd_stepswithintol) )
33287     {
33288         goto lbl_2;
33289     }
33290 
33291     /*
33292      * Update QCur := QNew
33293      *
33294      * Calculate A*Q'
33295      */
33296     rmatrixcopy(nwork, n, &state->qnew, 0, 0, &state->qcur, 0, 0, _state);
33297     rmatrixtranspose(nwork, n, &state->qcur, 0, 0, &state->x, 0, 0, _state);
33298     evd_clearrfields(state, _state);
33299     state->requesttype = 0;
33300     state->requestsize = nwork;
33301     state->rstate.stage = 0;
33302     goto lbl_rcomm;
33303 lbl_0:
33304 
33305     /*
33306      * Perform Rayleigh-Ritz step to estimate convergence of diagonal eigenvalues
33307      */
33308     if( ae_fp_greater(state->eps,(double)(0)) )
33309     {
33310         ae_assert(state->matrixtype==0, "integrity check failed", _state);
33311         rmatrixsetlengthatleast(&state->r, nwork, nwork, _state);
33312         rmatrixgemm(nwork, nwork, n, 1.0, &state->qcur, 0, 0, 0, &state->ax, 0, 0, 0, 0.0, &state->r, 0, 0, _state);
33313         if( !smatrixevd(&state->r, nwork, 0, ae_true, &state->wcur, &state->dummy, _state) )
33314         {
33315             ae_assert(ae_false, "EigSubspace: direct eigensolver failed to converge", _state);
33316         }
33317         for(j=0; j<=nwork-1; j++)
33318         {
33319             state->wrank.ptr.p_double[j] = ae_fabs(state->wcur.ptr.p_double[j], _state);
33320         }
33321         rankxuntied(&state->wrank, nwork, &state->buf, _state);
33322         v = (double)(0);
33323         vv = (double)(0);
33324         for(j=0; j<=nwork-1; j++)
33325         {
33326             if( ae_fp_greater_eq(state->wrank.ptr.p_double[j],(double)(nwork-k)) )
33327             {
33328                 v = ae_maxreal(v, ae_fabs(state->wcur.ptr.p_double[j]-state->wprev.ptr.p_double[j], _state), _state);
33329                 vv = ae_maxreal(vv, ae_fabs(state->wcur.ptr.p_double[j], _state), _state);
33330             }
33331         }
33332         if( ae_fp_eq(vv,(double)(0)) )
33333         {
33334             vv = (double)(1);
33335         }
33336         if( ae_fp_less_eq(v,state->eps*vv) )
33337         {
33338             inc(&convcnt, _state);
33339         }
33340         else
33341         {
33342             convcnt = 0;
33343         }
33344         for(j=0; j<=nwork-1; j++)
33345         {
33346             state->wprev.ptr.p_double[j] = state->wcur.ptr.p_double[j];
33347         }
33348     }
33349 
33350     /*
33351      * QR renormalization and update of QNew
33352      */
33353     rmatrixtranspose(n, nwork, &state->ax, 0, 0, &state->znew, 0, 0, _state);
33354     rmatrixlq(&state->znew, nwork, n, &state->tau, _state);
33355     rmatrixlqunpackq(&state->znew, nwork, n, &state->tau, nwork, &state->qnew, _state);
33356 
33357     /*
33358      * Update iteration index
33359      */
33360     state->repiterationscount = state->repiterationscount+1;
33361     goto lbl_1;
33362 lbl_2:
33363 
33364     /*
33365      * Perform Rayleigh-Ritz step: find true eigenpairs in NWork-dimensional
33366      * subspace.
33367      */
33368     ae_assert(state->matrixtype==0, "integrity check failed", _state);
33369     ae_assert(state->eigenvectorsneeded==1, "Assertion failed", _state);
33370     rmatrixgemm(nwork, nwork, n, 1.0, &state->qcur, 0, 0, 0, &state->ax, 0, 0, 0, 0.0, &state->r, 0, 0, _state);
33371     if( !smatrixevd(&state->r, nwork, 1, ae_true, &state->tw, &state->tz, _state) )
33372     {
33373         ae_assert(ae_false, "EigSubspace: direct eigensolver failed to converge", _state);
33374     }
33375 
33376     /*
33377      * Reorder eigenpairs according to their absolute magnitude, select
33378      * K top ones. This reordering algorithm is very inefficient and has
33379      * O(NWork*K) running time, but it is still faster than other parts
33380      * of the solver, so we may use it.
33381      *
33382      * Then, we transform RZ to RQ (full N-dimensional representation).
33383      * After this part is done, RW and RQ contain solution.
33384      */
33385     for(j=0; j<=nwork-1; j++)
33386     {
33387         state->wrank.ptr.p_double[j] = ae_fabs(state->tw.ptr.p_double[j], _state);
33388     }
33389     rankxuntied(&state->wrank, nwork, &state->buf, _state);
33390     cnt = 0;
33391     for(i=nwork-1; i>=nwork-k; i--)
33392     {
33393         for(i1=0; i1<=nwork-1; i1++)
33394         {
33395             if( ae_fp_eq(state->wrank.ptr.p_double[i1],(double)(i)) )
33396             {
33397                 ae_assert(cnt<k, "EigSubspace: integrity check failed", _state);
33398                 state->rw.ptr.p_double[cnt] = state->tw.ptr.p_double[i1];
33399                 for(j=0; j<=nwork-1; j++)
33400                 {
33401                     state->rz.ptr.pp_double[j][cnt] = state->tz.ptr.pp_double[j][i1];
33402                 }
33403                 cnt = cnt+1;
33404             }
33405         }
33406     }
33407     ae_assert(cnt==k, "EigSubspace: integrity check failed", _state);
33408     rmatrixgemm(n, k, nwork, 1.0, &state->qcur, 0, 0, 1, &state->rz, 0, 0, 0, 0.0, &state->rq, 0, 0, _state);
33409     result = ae_false;
33410     return result;
33411 
33412     /*
33413      * Saving state
33414      */
33415 lbl_rcomm:
33416     result = ae_true;
33417     state->rstate.ia.ptr.p_int[0] = n;
33418     state->rstate.ia.ptr.p_int[1] = nwork;
33419     state->rstate.ia.ptr.p_int[2] = k;
33420     state->rstate.ia.ptr.p_int[3] = cnt;
33421     state->rstate.ia.ptr.p_int[4] = i;
33422     state->rstate.ia.ptr.p_int[5] = i1;
33423     state->rstate.ia.ptr.p_int[6] = j;
33424     state->rstate.ia.ptr.p_int[7] = convcnt;
33425     state->rstate.ra.ptr.p_double[0] = vv;
33426     state->rstate.ra.ptr.p_double[1] = v;
33427     return result;
33428 }
33429 
33430 
33431 /*************************************************************************
33432 Finding the eigenvalues and eigenvectors of a symmetric matrix
33433 
33434 The algorithm finds eigen pairs of a symmetric matrix by reducing it to
33435 tridiagonal form and using the QL/QR algorithm.
33436 
33437   ! COMMERCIAL EDITION OF ALGLIB:
33438   !
33439   ! Commercial Edition of ALGLIB includes following important improvements
33440   ! of this function:
33441   ! * high-performance native backend with same C# interface (C# version)
33442   ! * hardware vendor (Intel) implementations of linear algebra primitives
33443   !   (C++ and C# versions, x86/x64 platform)
33444   !
33445   ! We recommend you to read 'Working with commercial version' section  of
33446   ! ALGLIB Reference Manual in order to find out how to  use  performance-
33447   ! related features provided by commercial edition of ALGLIB.
33448 
33449 Input parameters:
33450     A       -   symmetric matrix which is given by its upper or lower
33451                 triangular part.
33452                 Array whose indexes range within [0..N-1, 0..N-1].
33453     N       -   size of matrix A.
33454     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
33455                 If ZNeeded is equal to:
33456                  * 0, the eigenvectors are not returned;
33457                  * 1, the eigenvectors are returned.
33458     IsUpper -   storage format.
33459 
33460 Output parameters:
33461     D       -   eigenvalues in ascending order.
33462                 Array whose index ranges within [0..N-1].
33463     Z       -   if ZNeeded is equal to:
33464                  * 0, Z hasn't changed;
33465                  * 1, Z contains the eigenvectors.
33466                 Array whose indexes range within [0..N-1, 0..N-1].
33467                 The eigenvectors are stored in the matrix columns.
33468 
33469 Result:
33470     True, if the algorithm has converged.
33471     False, if the algorithm hasn't converged (rare case).
33472 
33473   -- ALGLIB --
33474      Copyright 2005-2008 by Bochkanov Sergey
33475 *************************************************************************/
smatrixevd(ae_matrix * a,ae_int_t n,ae_int_t zneeded,ae_bool isupper,ae_vector * d,ae_matrix * z,ae_state * _state)33476 ae_bool smatrixevd(/* Real    */ ae_matrix* a,
33477      ae_int_t n,
33478      ae_int_t zneeded,
33479      ae_bool isupper,
33480      /* Real    */ ae_vector* d,
33481      /* Real    */ ae_matrix* z,
33482      ae_state *_state)
33483 {
33484     ae_frame _frame_block;
33485     ae_matrix _a;
33486     ae_vector tau;
33487     ae_vector e;
33488     ae_bool result;
33489 
33490     ae_frame_make(_state, &_frame_block);
33491     memset(&_a, 0, sizeof(_a));
33492     memset(&tau, 0, sizeof(tau));
33493     memset(&e, 0, sizeof(e));
33494     ae_matrix_init_copy(&_a, a, _state, ae_true);
33495     a = &_a;
33496     ae_vector_clear(d);
33497     ae_matrix_clear(z);
33498     ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
33499     ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
33500 
33501     ae_assert(zneeded==0||zneeded==1, "SMatrixEVD: incorrect ZNeeded", _state);
33502     smatrixtd(a, n, isupper, &tau, d, &e, _state);
33503     if( zneeded==1 )
33504     {
33505         smatrixtdunpackq(a, n, isupper, &tau, z, _state);
33506     }
33507     result = smatrixtdevd(d, &e, n, zneeded, z, _state);
33508     ae_frame_leave(_state);
33509     return result;
33510 }
33511 
33512 
33513 /*************************************************************************
33514 Subroutine for finding the eigenvalues (and eigenvectors) of  a  symmetric
33515 matrix  in  a  given half open interval (A, B] by using  a  bisection  and
33516 inverse iteration
33517 
33518   ! COMMERCIAL EDITION OF ALGLIB:
33519   !
33520   ! Commercial Edition of ALGLIB includes following important improvements
33521   ! of this function:
33522   ! * high-performance native backend with same C# interface (C# version)
33523   ! * hardware vendor (Intel) implementations of linear algebra primitives
33524   !   (C++ and C# versions, x86/x64 platform)
33525   !
33526   ! We recommend you to read 'Working with commercial version' section  of
33527   ! ALGLIB Reference Manual in order to find out how to  use  performance-
33528   ! related features provided by commercial edition of ALGLIB.
33529 
33530 Input parameters:
33531     A       -   symmetric matrix which is given by its upper or lower
33532                 triangular part. Array [0..N-1, 0..N-1].
33533     N       -   size of matrix A.
33534     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
33535                 If ZNeeded is equal to:
33536                  * 0, the eigenvectors are not returned;
33537                  * 1, the eigenvectors are returned.
33538     IsUpperA -  storage format of matrix A.
33539     B1, B2 -    half open interval (B1, B2] to search eigenvalues in.
33540 
33541 Output parameters:
33542     M       -   number of eigenvalues found in a given half-interval (M>=0).
33543     W       -   array of the eigenvalues found.
33544                 Array whose index ranges within [0..M-1].
33545     Z       -   if ZNeeded is equal to:
33546                  * 0, Z hasn't changed;
33547                  * 1, Z contains eigenvectors.
33548                 Array whose indexes range within [0..N-1, 0..M-1].
33549                 The eigenvectors are stored in the matrix columns.
33550 
33551 Result:
33552     True, if successful. M contains the number of eigenvalues in the given
33553     half-interval (could be equal to 0), W contains the eigenvalues,
33554     Z contains the eigenvectors (if needed).
33555 
33556     False, if the bisection method subroutine wasn't able to find the
33557     eigenvalues in the given interval or if the inverse iteration subroutine
33558     wasn't able to find all the corresponding eigenvectors.
33559     In that case, the eigenvalues and eigenvectors are not returned,
33560     M is equal to 0.
33561 
33562   -- ALGLIB --
33563      Copyright 07.01.2006 by Bochkanov Sergey
33564 *************************************************************************/
smatrixevdr(ae_matrix * a,ae_int_t n,ae_int_t zneeded,ae_bool isupper,double b1,double b2,ae_int_t * m,ae_vector * w,ae_matrix * z,ae_state * _state)33565 ae_bool smatrixevdr(/* Real    */ ae_matrix* a,
33566      ae_int_t n,
33567      ae_int_t zneeded,
33568      ae_bool isupper,
33569      double b1,
33570      double b2,
33571      ae_int_t* m,
33572      /* Real    */ ae_vector* w,
33573      /* Real    */ ae_matrix* z,
33574      ae_state *_state)
33575 {
33576     ae_frame _frame_block;
33577     ae_matrix _a;
33578     ae_vector tau;
33579     ae_vector e;
33580     ae_bool result;
33581 
33582     ae_frame_make(_state, &_frame_block);
33583     memset(&_a, 0, sizeof(_a));
33584     memset(&tau, 0, sizeof(tau));
33585     memset(&e, 0, sizeof(e));
33586     ae_matrix_init_copy(&_a, a, _state, ae_true);
33587     a = &_a;
33588     *m = 0;
33589     ae_vector_clear(w);
33590     ae_matrix_clear(z);
33591     ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
33592     ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
33593 
33594     ae_assert(zneeded==0||zneeded==1, "SMatrixTDEVDR: incorrect ZNeeded", _state);
33595     smatrixtd(a, n, isupper, &tau, w, &e, _state);
33596     if( zneeded==1 )
33597     {
33598         smatrixtdunpackq(a, n, isupper, &tau, z, _state);
33599     }
33600     result = smatrixtdevdr(w, &e, n, zneeded, b1, b2, m, z, _state);
33601     ae_frame_leave(_state);
33602     return result;
33603 }
33604 
33605 
33606 /*************************************************************************
33607 Subroutine for finding the eigenvalues and  eigenvectors  of  a  symmetric
33608 matrix with given indexes by using bisection and inverse iteration methods.
33609 
33610 Input parameters:
33611     A       -   symmetric matrix which is given by its upper or lower
33612                 triangular part. Array whose indexes range within [0..N-1, 0..N-1].
33613     N       -   size of matrix A.
33614     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
33615                 If ZNeeded is equal to:
33616                  * 0, the eigenvectors are not returned;
33617                  * 1, the eigenvectors are returned.
33618     IsUpperA -  storage format of matrix A.
33619     I1, I2 -    index interval for searching (from I1 to I2).
33620                 0 <= I1 <= I2 <= N-1.
33621 
33622 Output parameters:
33623     W       -   array of the eigenvalues found.
33624                 Array whose index ranges within [0..I2-I1].
33625     Z       -   if ZNeeded is equal to:
33626                  * 0, Z hasn't changed;
33627                  * 1, Z contains eigenvectors.
33628                 Array whose indexes range within [0..N-1, 0..I2-I1].
33629                 In that case, the eigenvectors are stored in the matrix columns.
33630 
33631 Result:
33632     True, if successful. W contains the eigenvalues, Z contains the
33633     eigenvectors (if needed).
33634 
33635     False, if the bisection method subroutine wasn't able to find the
33636     eigenvalues in the given interval or if the inverse iteration subroutine
33637     wasn't able to find all the corresponding eigenvectors.
33638     In that case, the eigenvalues and eigenvectors are not returned.
33639 
33640   -- ALGLIB --
33641      Copyright 07.01.2006 by Bochkanov Sergey
33642 *************************************************************************/
smatrixevdi(ae_matrix * a,ae_int_t n,ae_int_t zneeded,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * w,ae_matrix * z,ae_state * _state)33643 ae_bool smatrixevdi(/* Real    */ ae_matrix* a,
33644      ae_int_t n,
33645      ae_int_t zneeded,
33646      ae_bool isupper,
33647      ae_int_t i1,
33648      ae_int_t i2,
33649      /* Real    */ ae_vector* w,
33650      /* Real    */ ae_matrix* z,
33651      ae_state *_state)
33652 {
33653     ae_frame _frame_block;
33654     ae_matrix _a;
33655     ae_vector tau;
33656     ae_vector e;
33657     ae_bool result;
33658 
33659     ae_frame_make(_state, &_frame_block);
33660     memset(&_a, 0, sizeof(_a));
33661     memset(&tau, 0, sizeof(tau));
33662     memset(&e, 0, sizeof(e));
33663     ae_matrix_init_copy(&_a, a, _state, ae_true);
33664     a = &_a;
33665     ae_vector_clear(w);
33666     ae_matrix_clear(z);
33667     ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
33668     ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
33669 
33670     ae_assert(zneeded==0||zneeded==1, "SMatrixEVDI: incorrect ZNeeded", _state);
33671     smatrixtd(a, n, isupper, &tau, w, &e, _state);
33672     if( zneeded==1 )
33673     {
33674         smatrixtdunpackq(a, n, isupper, &tau, z, _state);
33675     }
33676     result = smatrixtdevdi(w, &e, n, zneeded, i1, i2, z, _state);
33677     ae_frame_leave(_state);
33678     return result;
33679 }
33680 
33681 
33682 /*************************************************************************
33683 Finding the eigenvalues and eigenvectors of a Hermitian matrix
33684 
33685 The algorithm finds eigen pairs of a Hermitian matrix by  reducing  it  to
33686 real tridiagonal form and using the QL/QR algorithm.
33687 
33688   ! COMMERCIAL EDITION OF ALGLIB:
33689   !
33690   ! Commercial Edition of ALGLIB includes following important improvements
33691   ! of this function:
33692   ! * high-performance native backend with same C# interface (C# version)
33693   ! * hardware vendor (Intel) implementations of linear algebra primitives
33694   !   (C++ and C# versions, x86/x64 platform)
33695   !
33696   ! We recommend you to read 'Working with commercial version' section  of
33697   ! ALGLIB Reference Manual in order to find out how to  use  performance-
33698   ! related features provided by commercial edition of ALGLIB.
33699 
33700 Input parameters:
33701     A       -   Hermitian matrix which is given  by  its  upper  or  lower
33702                 triangular part.
33703                 Array whose indexes range within [0..N-1, 0..N-1].
33704     N       -   size of matrix A.
33705     IsUpper -   storage format.
33706     ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
33707                 not. If ZNeeded is equal to:
33708                  * 0, the eigenvectors are not returned;
33709                  * 1, the eigenvectors are returned.
33710 
33711 Output parameters:
33712     D       -   eigenvalues in ascending order.
33713                 Array whose index ranges within [0..N-1].
33714     Z       -   if ZNeeded is equal to:
33715                  * 0, Z hasn't changed;
33716                  * 1, Z contains the eigenvectors.
33717                 Array whose indexes range within [0..N-1, 0..N-1].
33718                 The eigenvectors are stored in the matrix columns.
33719 
33720 Result:
33721     True, if the algorithm has converged.
33722     False, if the algorithm hasn't converged (rare case).
33723 
33724 Note:
33725     eigenvectors of Hermitian matrix are defined up to  multiplication  by
33726     a complex number L, such that |L|=1.
33727 
33728   -- ALGLIB --
33729      Copyright 2005, 23 March 2007 by Bochkanov Sergey
33730 *************************************************************************/
hmatrixevd(ae_matrix * a,ae_int_t n,ae_int_t zneeded,ae_bool isupper,ae_vector * d,ae_matrix * z,ae_state * _state)33731 ae_bool hmatrixevd(/* Complex */ ae_matrix* a,
33732      ae_int_t n,
33733      ae_int_t zneeded,
33734      ae_bool isupper,
33735      /* Real    */ ae_vector* d,
33736      /* Complex */ ae_matrix* z,
33737      ae_state *_state)
33738 {
33739     ae_frame _frame_block;
33740     ae_matrix _a;
33741     ae_vector tau;
33742     ae_vector e;
33743     ae_matrix t;
33744     ae_matrix qz;
33745     ae_matrix q;
33746     ae_int_t i;
33747     ae_int_t j;
33748     ae_bool result;
33749 
33750     ae_frame_make(_state, &_frame_block);
33751     memset(&_a, 0, sizeof(_a));
33752     memset(&tau, 0, sizeof(tau));
33753     memset(&e, 0, sizeof(e));
33754     memset(&t, 0, sizeof(t));
33755     memset(&qz, 0, sizeof(qz));
33756     memset(&q, 0, sizeof(q));
33757     ae_matrix_init_copy(&_a, a, _state, ae_true);
33758     a = &_a;
33759     ae_vector_clear(d);
33760     ae_matrix_clear(z);
33761     ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true);
33762     ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
33763     ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
33764     ae_matrix_init(&qz, 0, 0, DT_REAL, _state, ae_true);
33765     ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true);
33766 
33767     ae_assert(zneeded==0||zneeded==1, "HermitianEVD: incorrect ZNeeded", _state);
33768 
33769     /*
33770      * Reduce to tridiagonal form
33771      */
33772     hmatrixtd(a, n, isupper, &tau, d, &e, _state);
33773     if( zneeded==1 )
33774     {
33775         hmatrixtdunpackq(a, n, isupper, &tau, &q, _state);
33776         zneeded = 2;
33777     }
33778 
33779     /*
33780      * TDEVD
33781      */
33782     result = smatrixtdevd(d, &e, n, zneeded, &t, _state);
33783 
33784     /*
33785      * Eigenvectors are needed
33786      * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T
33787      */
33788     if( result&&zneeded!=0 )
33789     {
33790         ae_matrix_set_length(z, n, n, _state);
33791         ae_matrix_set_length(&qz, n, 2*n, _state);
33792 
33793         /*
33794          * Calculate Re(Q)*T
33795          */
33796         for(i=0; i<=n-1; i++)
33797         {
33798             for(j=0; j<=n-1; j++)
33799             {
33800                 qz.ptr.pp_double[i][j] = q.ptr.pp_complex[i][j].x;
33801             }
33802         }
33803         rmatrixgemm(n, n, n, 1.0, &qz, 0, 0, 0, &t, 0, 0, 0, 0.0, &qz, 0, n, _state);
33804         for(i=0; i<=n-1; i++)
33805         {
33806             for(j=0; j<=n-1; j++)
33807             {
33808                 z->ptr.pp_complex[i][j].x = qz.ptr.pp_double[i][n+j];
33809             }
33810         }
33811 
33812         /*
33813          * Calculate Im(Q)*T
33814          */
33815         for(i=0; i<=n-1; i++)
33816         {
33817             for(j=0; j<=n-1; j++)
33818             {
33819                 qz.ptr.pp_double[i][j] = q.ptr.pp_complex[i][j].y;
33820             }
33821         }
33822         rmatrixgemm(n, n, n, 1.0, &qz, 0, 0, 0, &t, 0, 0, 0, 0.0, &qz, 0, n, _state);
33823         for(i=0; i<=n-1; i++)
33824         {
33825             for(j=0; j<=n-1; j++)
33826             {
33827                 z->ptr.pp_complex[i][j].y = qz.ptr.pp_double[i][n+j];
33828             }
33829         }
33830     }
33831     ae_frame_leave(_state);
33832     return result;
33833 }
33834 
33835 
33836 /*************************************************************************
33837 Subroutine for finding the eigenvalues (and eigenvectors) of  a  Hermitian
33838 matrix  in  a  given half-interval (A, B] by using a bisection and inverse
33839 iteration
33840 
33841 Input parameters:
33842     A       -   Hermitian matrix which is given  by  its  upper  or  lower
33843                 triangular  part.  Array  whose   indexes   range   within
33844                 [0..N-1, 0..N-1].
33845     N       -   size of matrix A.
33846     ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
33847                 not. If ZNeeded is equal to:
33848                  * 0, the eigenvectors are not returned;
33849                  * 1, the eigenvectors are returned.
33850     IsUpperA -  storage format of matrix A.
33851     B1, B2 -    half-interval (B1, B2] to search eigenvalues in.
33852 
33853 Output parameters:
33854     M       -   number of eigenvalues found in a given half-interval, M>=0
33855     W       -   array of the eigenvalues found.
33856                 Array whose index ranges within [0..M-1].
33857     Z       -   if ZNeeded is equal to:
33858                  * 0, Z hasn't changed;
33859                  * 1, Z contains eigenvectors.
33860                 Array whose indexes range within [0..N-1, 0..M-1].
33861                 The eigenvectors are stored in the matrix columns.
33862 
33863 Result:
33864     True, if successful. M contains the number of eigenvalues in the given
33865     half-interval (could be equal to 0), W contains the eigenvalues,
33866     Z contains the eigenvectors (if needed).
33867 
33868     False, if the bisection method subroutine  wasn't  able  to  find  the
33869     eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
33870     subroutine  wasn't  able  to  find all the corresponding eigenvectors.
33871     In that case, the eigenvalues and eigenvectors are not returned, M  is
33872     equal to 0.
33873 
33874 Note:
33875     eigen vectors of Hermitian matrix are defined up to multiplication  by
33876     a complex number L, such as |L|=1.
33877 
33878   -- ALGLIB --
33879      Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
33880 *************************************************************************/
hmatrixevdr(ae_matrix * a,ae_int_t n,ae_int_t zneeded,ae_bool isupper,double b1,double b2,ae_int_t * m,ae_vector * w,ae_matrix * z,ae_state * _state)33881 ae_bool hmatrixevdr(/* Complex */ ae_matrix* a,
33882      ae_int_t n,
33883      ae_int_t zneeded,
33884      ae_bool isupper,
33885      double b1,
33886      double b2,
33887      ae_int_t* m,
33888      /* Real    */ ae_vector* w,
33889      /* Complex */ ae_matrix* z,
33890      ae_state *_state)
33891 {
33892     ae_frame _frame_block;
33893     ae_matrix _a;
33894     ae_matrix q;
33895     ae_matrix t;
33896     ae_vector tau;
33897     ae_vector e;
33898     ae_vector work;
33899     ae_int_t i;
33900     ae_int_t k;
33901     double v;
33902     ae_bool result;
33903 
33904     ae_frame_make(_state, &_frame_block);
33905     memset(&_a, 0, sizeof(_a));
33906     memset(&q, 0, sizeof(q));
33907     memset(&t, 0, sizeof(t));
33908     memset(&tau, 0, sizeof(tau));
33909     memset(&e, 0, sizeof(e));
33910     memset(&work, 0, sizeof(work));
33911     ae_matrix_init_copy(&_a, a, _state, ae_true);
33912     a = &_a;
33913     *m = 0;
33914     ae_vector_clear(w);
33915     ae_matrix_clear(z);
33916     ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true);
33917     ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
33918     ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true);
33919     ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
33920     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
33921 
33922     ae_assert(zneeded==0||zneeded==1, "HermitianEigenValuesAndVectorsInInterval: incorrect ZNeeded", _state);
33923 
33924     /*
33925      * Reduce to tridiagonal form
33926      */
33927     hmatrixtd(a, n, isupper, &tau, w, &e, _state);
33928     if( zneeded==1 )
33929     {
33930         hmatrixtdunpackq(a, n, isupper, &tau, &q, _state);
33931         zneeded = 2;
33932     }
33933 
33934     /*
33935      * Bisection and inverse iteration
33936      */
33937     result = smatrixtdevdr(w, &e, n, zneeded, b1, b2, m, &t, _state);
33938 
33939     /*
33940      * Eigenvectors are needed
33941      * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T
33942      */
33943     if( (result&&zneeded!=0)&&*m!=0 )
33944     {
33945         ae_vector_set_length(&work, *m-1+1, _state);
33946         ae_matrix_set_length(z, n-1+1, *m-1+1, _state);
33947         for(i=0; i<=n-1; i++)
33948         {
33949 
33950             /*
33951              * Calculate real part
33952              */
33953             for(k=0; k<=*m-1; k++)
33954             {
33955                 work.ptr.p_double[k] = (double)(0);
33956             }
33957             for(k=0; k<=n-1; k++)
33958             {
33959                 v = q.ptr.pp_complex[i][k].x;
33960                 ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,*m-1), v);
33961             }
33962             for(k=0; k<=*m-1; k++)
33963             {
33964                 z->ptr.pp_complex[i][k].x = work.ptr.p_double[k];
33965             }
33966 
33967             /*
33968              * Calculate imaginary part
33969              */
33970             for(k=0; k<=*m-1; k++)
33971             {
33972                 work.ptr.p_double[k] = (double)(0);
33973             }
33974             for(k=0; k<=n-1; k++)
33975             {
33976                 v = q.ptr.pp_complex[i][k].y;
33977                 ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,*m-1), v);
33978             }
33979             for(k=0; k<=*m-1; k++)
33980             {
33981                 z->ptr.pp_complex[i][k].y = work.ptr.p_double[k];
33982             }
33983         }
33984     }
33985     ae_frame_leave(_state);
33986     return result;
33987 }
33988 
33989 
33990 /*************************************************************************
33991 Subroutine for finding the eigenvalues and  eigenvectors  of  a  Hermitian
33992 matrix with given indexes by using bisection and inverse iteration methods
33993 
33994 Input parameters:
33995     A       -   Hermitian matrix which is given  by  its  upper  or  lower
33996                 triangular part.
33997                 Array whose indexes range within [0..N-1, 0..N-1].
33998     N       -   size of matrix A.
33999     ZNeeded -   flag controlling whether the eigenvectors  are  needed  or
34000                 not. If ZNeeded is equal to:
34001                  * 0, the eigenvectors are not returned;
34002                  * 1, the eigenvectors are returned.
34003     IsUpperA -  storage format of matrix A.
34004     I1, I2 -    index interval for searching (from I1 to I2).
34005                 0 <= I1 <= I2 <= N-1.
34006 
34007 Output parameters:
34008     W       -   array of the eigenvalues found.
34009                 Array whose index ranges within [0..I2-I1].
34010     Z       -   if ZNeeded is equal to:
34011                  * 0, Z hasn't changed;
34012                  * 1, Z contains eigenvectors.
34013                 Array whose indexes range within [0..N-1, 0..I2-I1].
34014                 In  that  case,  the eigenvectors are stored in the matrix
34015                 columns.
34016 
34017 Result:
34018     True, if successful. W contains the eigenvalues, Z contains the
34019     eigenvectors (if needed).
34020 
34021     False, if the bisection method subroutine  wasn't  able  to  find  the
34022     eigenvalues  in  the  given  interval  or  if  the  inverse  iteration
34023     subroutine wasn't able to find  all  the  corresponding  eigenvectors.
34024     In that case, the eigenvalues and eigenvectors are not returned.
34025 
34026 Note:
34027     eigen vectors of Hermitian matrix are defined up to multiplication  by
34028     a complex number L, such as |L|=1.
34029 
34030   -- ALGLIB --
34031      Copyright 07.01.2006, 24.03.2007 by Bochkanov Sergey.
34032 *************************************************************************/
hmatrixevdi(ae_matrix * a,ae_int_t n,ae_int_t zneeded,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * w,ae_matrix * z,ae_state * _state)34033 ae_bool hmatrixevdi(/* Complex */ ae_matrix* a,
34034      ae_int_t n,
34035      ae_int_t zneeded,
34036      ae_bool isupper,
34037      ae_int_t i1,
34038      ae_int_t i2,
34039      /* Real    */ ae_vector* w,
34040      /* Complex */ ae_matrix* z,
34041      ae_state *_state)
34042 {
34043     ae_frame _frame_block;
34044     ae_matrix _a;
34045     ae_matrix q;
34046     ae_matrix t;
34047     ae_vector tau;
34048     ae_vector e;
34049     ae_vector work;
34050     ae_int_t i;
34051     ae_int_t k;
34052     double v;
34053     ae_int_t m;
34054     ae_bool result;
34055 
34056     ae_frame_make(_state, &_frame_block);
34057     memset(&_a, 0, sizeof(_a));
34058     memset(&q, 0, sizeof(q));
34059     memset(&t, 0, sizeof(t));
34060     memset(&tau, 0, sizeof(tau));
34061     memset(&e, 0, sizeof(e));
34062     memset(&work, 0, sizeof(work));
34063     ae_matrix_init_copy(&_a, a, _state, ae_true);
34064     a = &_a;
34065     ae_vector_clear(w);
34066     ae_matrix_clear(z);
34067     ae_matrix_init(&q, 0, 0, DT_COMPLEX, _state, ae_true);
34068     ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
34069     ae_vector_init(&tau, 0, DT_COMPLEX, _state, ae_true);
34070     ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
34071     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
34072 
34073     ae_assert(zneeded==0||zneeded==1, "HermitianEigenValuesAndVectorsByIndexes: incorrect ZNeeded", _state);
34074 
34075     /*
34076      * Reduce to tridiagonal form
34077      */
34078     hmatrixtd(a, n, isupper, &tau, w, &e, _state);
34079     if( zneeded==1 )
34080     {
34081         hmatrixtdunpackq(a, n, isupper, &tau, &q, _state);
34082         zneeded = 2;
34083     }
34084 
34085     /*
34086      * Bisection and inverse iteration
34087      */
34088     result = smatrixtdevdi(w, &e, n, zneeded, i1, i2, &t, _state);
34089 
34090     /*
34091      * Eigenvectors are needed
34092      * Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T
34093      */
34094     m = i2-i1+1;
34095     if( result&&zneeded!=0 )
34096     {
34097         ae_vector_set_length(&work, m-1+1, _state);
34098         ae_matrix_set_length(z, n-1+1, m-1+1, _state);
34099         for(i=0; i<=n-1; i++)
34100         {
34101 
34102             /*
34103              * Calculate real part
34104              */
34105             for(k=0; k<=m-1; k++)
34106             {
34107                 work.ptr.p_double[k] = (double)(0);
34108             }
34109             for(k=0; k<=n-1; k++)
34110             {
34111                 v = q.ptr.pp_complex[i][k].x;
34112                 ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,m-1), v);
34113             }
34114             for(k=0; k<=m-1; k++)
34115             {
34116                 z->ptr.pp_complex[i][k].x = work.ptr.p_double[k];
34117             }
34118 
34119             /*
34120              * Calculate imaginary part
34121              */
34122             for(k=0; k<=m-1; k++)
34123             {
34124                 work.ptr.p_double[k] = (double)(0);
34125             }
34126             for(k=0; k<=n-1; k++)
34127             {
34128                 v = q.ptr.pp_complex[i][k].y;
34129                 ae_v_addd(&work.ptr.p_double[0], 1, &t.ptr.pp_double[k][0], 1, ae_v_len(0,m-1), v);
34130             }
34131             for(k=0; k<=m-1; k++)
34132             {
34133                 z->ptr.pp_complex[i][k].y = work.ptr.p_double[k];
34134             }
34135         }
34136     }
34137     ae_frame_leave(_state);
34138     return result;
34139 }
34140 
34141 
34142 /*************************************************************************
34143 Finding the eigenvalues and eigenvectors of a tridiagonal symmetric matrix
34144 
34145 The algorithm finds the eigen pairs of a tridiagonal symmetric matrix by
34146 using an QL/QR algorithm with implicit shifts.
34147 
34148   ! COMMERCIAL EDITION OF ALGLIB:
34149   !
34150   ! Commercial Edition of ALGLIB includes following important improvements
34151   ! of this function:
34152   ! * high-performance native backend with same C# interface (C# version)
34153   ! * hardware vendor (Intel) implementations of linear algebra primitives
34154   !   (C++ and C# versions, x86/x64 platform)
34155   !
34156   ! We recommend you to read 'Working with commercial version' section  of
34157   ! ALGLIB Reference Manual in order to find out how to  use  performance-
34158   ! related features provided by commercial edition of ALGLIB.
34159 
34160 Input parameters:
34161     D       -   the main diagonal of a tridiagonal matrix.
34162                 Array whose index ranges within [0..N-1].
34163     E       -   the secondary diagonal of a tridiagonal matrix.
34164                 Array whose index ranges within [0..N-2].
34165     N       -   size of matrix A.
34166     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
34167                 If ZNeeded is equal to:
34168                  * 0, the eigenvectors are not needed;
34169                  * 1, the eigenvectors of a tridiagonal matrix
34170                    are multiplied by the square matrix Z. It is used if the
34171                    tridiagonal matrix is obtained by the similarity
34172                    transformation of a symmetric matrix;
34173                  * 2, the eigenvectors of a tridiagonal matrix replace the
34174                    square matrix Z;
34175                  * 3, matrix Z contains the first row of the eigenvectors
34176                    matrix.
34177     Z       -   if ZNeeded=1, Z contains the square matrix by which the
34178                 eigenvectors are multiplied.
34179                 Array whose indexes range within [0..N-1, 0..N-1].
34180 
34181 Output parameters:
34182     D       -   eigenvalues in ascending order.
34183                 Array whose index ranges within [0..N-1].
34184     Z       -   if ZNeeded is equal to:
34185                  * 0, Z hasn't changed;
34186                  * 1, Z contains the product of a given matrix (from the left)
34187                    and the eigenvectors matrix (from the right);
34188                  * 2, Z contains the eigenvectors.
34189                  * 3, Z contains the first row of the eigenvectors matrix.
34190                 If ZNeeded<3, Z is the array whose indexes range within [0..N-1, 0..N-1].
34191                 In that case, the eigenvectors are stored in the matrix columns.
34192                 If ZNeeded=3, Z is the array whose indexes range within [0..0, 0..N-1].
34193 
34194 Result:
34195     True, if the algorithm has converged.
34196     False, if the algorithm hasn't converged.
34197 
34198   -- LAPACK routine (version 3.0) --
34199      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
34200      Courant Institute, Argonne National Lab, and Rice University
34201      September 30, 1994
34202 *************************************************************************/
smatrixtdevd(ae_vector * d,ae_vector * e,ae_int_t n,ae_int_t zneeded,ae_matrix * z,ae_state * _state)34203 ae_bool smatrixtdevd(/* Real    */ ae_vector* d,
34204      /* Real    */ ae_vector* e,
34205      ae_int_t n,
34206      ae_int_t zneeded,
34207      /* Real    */ ae_matrix* z,
34208      ae_state *_state)
34209 {
34210     ae_frame _frame_block;
34211     ae_vector _e;
34212     ae_vector d1;
34213     ae_vector e1;
34214     ae_vector ex;
34215     ae_matrix z1;
34216     ae_int_t i;
34217     ae_int_t j;
34218     ae_bool result;
34219 
34220     ae_frame_make(_state, &_frame_block);
34221     memset(&_e, 0, sizeof(_e));
34222     memset(&d1, 0, sizeof(d1));
34223     memset(&e1, 0, sizeof(e1));
34224     memset(&ex, 0, sizeof(ex));
34225     memset(&z1, 0, sizeof(z1));
34226     ae_vector_init_copy(&_e, e, _state, ae_true);
34227     e = &_e;
34228     ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
34229     ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
34230     ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
34231     ae_matrix_init(&z1, 0, 0, DT_REAL, _state, ae_true);
34232 
34233     ae_assert(n>=1, "SMatrixTDEVD: N<=0", _state);
34234     ae_assert(zneeded>=0&&zneeded<=3, "SMatrixTDEVD: incorrect ZNeeded", _state);
34235     result = ae_false;
34236 
34237     /*
34238      * Preprocess Z: make ZNeeded equal to 0, 1 or 3.
34239      * Ensure that memory for Z is allocated.
34240      */
34241     if( zneeded==2 )
34242     {
34243 
34244         /*
34245          * Load identity to Z
34246          */
34247         rmatrixsetlengthatleast(z, n, n, _state);
34248         for(i=0; i<=n-1; i++)
34249         {
34250             for(j=0; j<=n-1; j++)
34251             {
34252                 z->ptr.pp_double[i][j] = 0.0;
34253             }
34254             z->ptr.pp_double[i][i] = 1.0;
34255         }
34256         zneeded = 1;
34257     }
34258     if( zneeded==3 )
34259     {
34260 
34261         /*
34262          * Allocate memory
34263          */
34264         rmatrixsetlengthatleast(z, 1, n, _state);
34265     }
34266 
34267     /*
34268      * Try to solve problem with MKL
34269      */
34270     ae_vector_set_length(&ex, n, _state);
34271     for(i=0; i<=n-2; i++)
34272     {
34273         ex.ptr.p_double[i] = e->ptr.p_double[i];
34274     }
34275     if( smatrixtdevdmkl(d, &ex, n, zneeded, z, &result, _state) )
34276     {
34277         ae_frame_leave(_state);
34278         return result;
34279     }
34280 
34281     /*
34282      * Prepare 1-based task
34283      */
34284     ae_vector_set_length(&d1, n+1, _state);
34285     ae_vector_set_length(&e1, n+1, _state);
34286     ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
34287     if( n>1 )
34288     {
34289         ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
34290     }
34291     if( zneeded==1 )
34292     {
34293         ae_matrix_set_length(&z1, n+1, n+1, _state);
34294         for(i=1; i<=n; i++)
34295         {
34296             ae_v_move(&z1.ptr.pp_double[i][1], 1, &z->ptr.pp_double[i-1][0], 1, ae_v_len(1,n));
34297         }
34298     }
34299 
34300     /*
34301      * Solve 1-based task
34302      */
34303     result = evd_tridiagonalevd(&d1, &e1, n, zneeded, &z1, _state);
34304     if( !result )
34305     {
34306         ae_frame_leave(_state);
34307         return result;
34308     }
34309 
34310     /*
34311      * Convert back to 0-based result
34312      */
34313     ae_v_move(&d->ptr.p_double[0], 1, &d1.ptr.p_double[1], 1, ae_v_len(0,n-1));
34314     if( zneeded!=0 )
34315     {
34316         if( zneeded==1 )
34317         {
34318             for(i=1; i<=n; i++)
34319             {
34320                 ae_v_move(&z->ptr.pp_double[i-1][0], 1, &z1.ptr.pp_double[i][1], 1, ae_v_len(0,n-1));
34321             }
34322             ae_frame_leave(_state);
34323             return result;
34324         }
34325         if( zneeded==2 )
34326         {
34327             ae_matrix_set_length(z, n-1+1, n-1+1, _state);
34328             for(i=1; i<=n; i++)
34329             {
34330                 ae_v_move(&z->ptr.pp_double[i-1][0], 1, &z1.ptr.pp_double[i][1], 1, ae_v_len(0,n-1));
34331             }
34332             ae_frame_leave(_state);
34333             return result;
34334         }
34335         if( zneeded==3 )
34336         {
34337             ae_matrix_set_length(z, 0+1, n-1+1, _state);
34338             ae_v_move(&z->ptr.pp_double[0][0], 1, &z1.ptr.pp_double[1][1], 1, ae_v_len(0,n-1));
34339             ae_frame_leave(_state);
34340             return result;
34341         }
34342         ae_assert(ae_false, "SMatrixTDEVD: Incorrect ZNeeded!", _state);
34343     }
34344     ae_frame_leave(_state);
34345     return result;
34346 }
34347 
34348 
34349 /*************************************************************************
34350 Subroutine for finding the tridiagonal matrix eigenvalues/vectors in a
34351 given half-interval (A, B] by using bisection and inverse iteration.
34352 
34353 Input parameters:
34354     D       -   the main diagonal of a tridiagonal matrix.
34355                 Array whose index ranges within [0..N-1].
34356     E       -   the secondary diagonal of a tridiagonal matrix.
34357                 Array whose index ranges within [0..N-2].
34358     N       -   size of matrix, N>=0.
34359     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
34360                 If ZNeeded is equal to:
34361                  * 0, the eigenvectors are not needed;
34362                  * 1, the eigenvectors of a tridiagonal matrix are multiplied
34363                    by the square matrix Z. It is used if the tridiagonal
34364                    matrix is obtained by the similarity transformation
34365                    of a symmetric matrix.
34366                  * 2, the eigenvectors of a tridiagonal matrix replace matrix Z.
34367     A, B    -   half-interval (A, B] to search eigenvalues in.
34368     Z       -   if ZNeeded is equal to:
34369                  * 0, Z isn't used and remains unchanged;
34370                  * 1, Z contains the square matrix (array whose indexes range
34371                    within [0..N-1, 0..N-1]) which reduces the given symmetric
34372                    matrix to tridiagonal form;
34373                  * 2, Z isn't used (but changed on the exit).
34374 
34375 Output parameters:
34376     D       -   array of the eigenvalues found.
34377                 Array whose index ranges within [0..M-1].
34378     M       -   number of eigenvalues found in the given half-interval (M>=0).
34379     Z       -   if ZNeeded is equal to:
34380                  * 0, doesn't contain any information;
34381                  * 1, contains the product of a given NxN matrix Z (from the
34382                    left) and NxM matrix of the eigenvectors found (from the
34383                    right). Array whose indexes range within [0..N-1, 0..M-1].
34384                  * 2, contains the matrix of the eigenvectors found.
34385                    Array whose indexes range within [0..N-1, 0..M-1].
34386 
34387 Result:
34388 
34389     True, if successful. In that case, M contains the number of eigenvalues
34390     in the given half-interval (could be equal to 0), D contains the eigenvalues,
34391     Z contains the eigenvectors (if needed).
34392     It should be noted that the subroutine changes the size of arrays D and Z.
34393 
34394     False, if the bisection method subroutine wasn't able to find the
34395     eigenvalues in the given interval or if the inverse iteration subroutine
34396     wasn't able to find all the corresponding eigenvectors. In that case,
34397     the eigenvalues and eigenvectors are not returned, M is equal to 0.
34398 
34399   -- ALGLIB --
34400      Copyright 31.03.2008 by Bochkanov Sergey
34401 *************************************************************************/
smatrixtdevdr(ae_vector * d,ae_vector * e,ae_int_t n,ae_int_t zneeded,double a,double b,ae_int_t * m,ae_matrix * z,ae_state * _state)34402 ae_bool smatrixtdevdr(/* Real    */ ae_vector* d,
34403      /* Real    */ ae_vector* e,
34404      ae_int_t n,
34405      ae_int_t zneeded,
34406      double a,
34407      double b,
34408      ae_int_t* m,
34409      /* Real    */ ae_matrix* z,
34410      ae_state *_state)
34411 {
34412     ae_frame _frame_block;
34413     ae_int_t errorcode;
34414     ae_int_t nsplit;
34415     ae_int_t i;
34416     ae_int_t j;
34417     ae_int_t k;
34418     ae_int_t cr;
34419     ae_vector iblock;
34420     ae_vector isplit;
34421     ae_vector ifail;
34422     ae_vector d1;
34423     ae_vector e1;
34424     ae_vector w;
34425     ae_matrix z2;
34426     ae_matrix z3;
34427     double v;
34428     ae_bool result;
34429 
34430     ae_frame_make(_state, &_frame_block);
34431     memset(&iblock, 0, sizeof(iblock));
34432     memset(&isplit, 0, sizeof(isplit));
34433     memset(&ifail, 0, sizeof(ifail));
34434     memset(&d1, 0, sizeof(d1));
34435     memset(&e1, 0, sizeof(e1));
34436     memset(&w, 0, sizeof(w));
34437     memset(&z2, 0, sizeof(z2));
34438     memset(&z3, 0, sizeof(z3));
34439     *m = 0;
34440     ae_vector_init(&iblock, 0, DT_INT, _state, ae_true);
34441     ae_vector_init(&isplit, 0, DT_INT, _state, ae_true);
34442     ae_vector_init(&ifail, 0, DT_INT, _state, ae_true);
34443     ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
34444     ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
34445     ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
34446     ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true);
34447     ae_matrix_init(&z3, 0, 0, DT_REAL, _state, ae_true);
34448 
34449     ae_assert(zneeded>=0&&zneeded<=2, "SMatrixTDEVDR: incorrect ZNeeded!", _state);
34450 
34451     /*
34452      * Special cases
34453      */
34454     if( ae_fp_less_eq(b,a) )
34455     {
34456         *m = 0;
34457         result = ae_true;
34458         ae_frame_leave(_state);
34459         return result;
34460     }
34461     if( n<=0 )
34462     {
34463         *m = 0;
34464         result = ae_true;
34465         ae_frame_leave(_state);
34466         return result;
34467     }
34468 
34469     /*
34470      * Copy D,E to D1, E1
34471      */
34472     ae_vector_set_length(&d1, n+1, _state);
34473     ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
34474     if( n>1 )
34475     {
34476         ae_vector_set_length(&e1, n-1+1, _state);
34477         ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
34478     }
34479 
34480     /*
34481      * No eigen vectors
34482      */
34483     if( zneeded==0 )
34484     {
34485         result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 1, a, b, 0, 0, (double)(-1), &w, m, &nsplit, &iblock, &isplit, &errorcode, _state);
34486         if( !result||*m==0 )
34487         {
34488             *m = 0;
34489             ae_frame_leave(_state);
34490             return result;
34491         }
34492         ae_vector_set_length(d, *m-1+1, _state);
34493         ae_v_move(&d->ptr.p_double[0], 1, &w.ptr.p_double[1], 1, ae_v_len(0,*m-1));
34494         ae_frame_leave(_state);
34495         return result;
34496     }
34497 
34498     /*
34499      * Eigen vectors are multiplied by Z
34500      */
34501     if( zneeded==1 )
34502     {
34503 
34504         /*
34505          * Find eigen pairs
34506          */
34507         result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 2, a, b, 0, 0, (double)(-1), &w, m, &nsplit, &iblock, &isplit, &errorcode, _state);
34508         if( !result||*m==0 )
34509         {
34510             *m = 0;
34511             ae_frame_leave(_state);
34512             return result;
34513         }
34514         evd_internaldstein(n, &d1, &e1, *m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
34515         if( cr!=0 )
34516         {
34517             *m = 0;
34518             result = ae_false;
34519             ae_frame_leave(_state);
34520             return result;
34521         }
34522 
34523         /*
34524          * Sort eigen values and vectors
34525          */
34526         for(i=1; i<=*m; i++)
34527         {
34528             k = i;
34529             for(j=i; j<=*m; j++)
34530             {
34531                 if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
34532                 {
34533                     k = j;
34534                 }
34535             }
34536             v = w.ptr.p_double[i];
34537             w.ptr.p_double[i] = w.ptr.p_double[k];
34538             w.ptr.p_double[k] = v;
34539             for(j=1; j<=n; j++)
34540             {
34541                 v = z2.ptr.pp_double[j][i];
34542                 z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
34543                 z2.ptr.pp_double[j][k] = v;
34544             }
34545         }
34546 
34547         /*
34548          * Transform Z2 and overwrite Z
34549          */
34550         ae_matrix_set_length(&z3, *m+1, n+1, _state);
34551         for(i=1; i<=*m; i++)
34552         {
34553             ae_v_move(&z3.ptr.pp_double[i][1], 1, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(1,n));
34554         }
34555         for(i=1; i<=n; i++)
34556         {
34557             for(j=1; j<=*m; j++)
34558             {
34559                 v = ae_v_dotproduct(&z->ptr.pp_double[i-1][0], 1, &z3.ptr.pp_double[j][1], 1, ae_v_len(0,n-1));
34560                 z2.ptr.pp_double[i][j] = v;
34561             }
34562         }
34563         ae_matrix_set_length(z, n-1+1, *m-1+1, _state);
34564         for(i=1; i<=*m; i++)
34565         {
34566             ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
34567         }
34568 
34569         /*
34570          * Store W
34571          */
34572         ae_vector_set_length(d, *m-1+1, _state);
34573         for(i=1; i<=*m; i++)
34574         {
34575             d->ptr.p_double[i-1] = w.ptr.p_double[i];
34576         }
34577         ae_frame_leave(_state);
34578         return result;
34579     }
34580 
34581     /*
34582      * Eigen vectors are stored in Z
34583      */
34584     if( zneeded==2 )
34585     {
34586 
34587         /*
34588          * Find eigen pairs
34589          */
34590         result = evd_internalbisectioneigenvalues(&d1, &e1, n, 2, 2, a, b, 0, 0, (double)(-1), &w, m, &nsplit, &iblock, &isplit, &errorcode, _state);
34591         if( !result||*m==0 )
34592         {
34593             *m = 0;
34594             ae_frame_leave(_state);
34595             return result;
34596         }
34597         evd_internaldstein(n, &d1, &e1, *m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
34598         if( cr!=0 )
34599         {
34600             *m = 0;
34601             result = ae_false;
34602             ae_frame_leave(_state);
34603             return result;
34604         }
34605 
34606         /*
34607          * Sort eigen values and vectors
34608          */
34609         for(i=1; i<=*m; i++)
34610         {
34611             k = i;
34612             for(j=i; j<=*m; j++)
34613             {
34614                 if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
34615                 {
34616                     k = j;
34617                 }
34618             }
34619             v = w.ptr.p_double[i];
34620             w.ptr.p_double[i] = w.ptr.p_double[k];
34621             w.ptr.p_double[k] = v;
34622             for(j=1; j<=n; j++)
34623             {
34624                 v = z2.ptr.pp_double[j][i];
34625                 z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
34626                 z2.ptr.pp_double[j][k] = v;
34627             }
34628         }
34629 
34630         /*
34631          * Store W
34632          */
34633         ae_vector_set_length(d, *m-1+1, _state);
34634         for(i=1; i<=*m; i++)
34635         {
34636             d->ptr.p_double[i-1] = w.ptr.p_double[i];
34637         }
34638         ae_matrix_set_length(z, n-1+1, *m-1+1, _state);
34639         for(i=1; i<=*m; i++)
34640         {
34641             ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
34642         }
34643         ae_frame_leave(_state);
34644         return result;
34645     }
34646     result = ae_false;
34647     ae_frame_leave(_state);
34648     return result;
34649 }
34650 
34651 
34652 /*************************************************************************
34653 Subroutine for finding tridiagonal matrix eigenvalues/vectors with given
34654 indexes (in ascending order) by using the bisection and inverse iteraion.
34655 
34656 Input parameters:
34657     D       -   the main diagonal of a tridiagonal matrix.
34658                 Array whose index ranges within [0..N-1].
34659     E       -   the secondary diagonal of a tridiagonal matrix.
34660                 Array whose index ranges within [0..N-2].
34661     N       -   size of matrix. N>=0.
34662     ZNeeded -   flag controlling whether the eigenvectors are needed or not.
34663                 If ZNeeded is equal to:
34664                  * 0, the eigenvectors are not needed;
34665                  * 1, the eigenvectors of a tridiagonal matrix are multiplied
34666                    by the square matrix Z. It is used if the
34667                    tridiagonal matrix is obtained by the similarity transformation
34668                    of a symmetric matrix.
34669                  * 2, the eigenvectors of a tridiagonal matrix replace
34670                    matrix Z.
34671     I1, I2  -   index interval for searching (from I1 to I2).
34672                 0 <= I1 <= I2 <= N-1.
34673     Z       -   if ZNeeded is equal to:
34674                  * 0, Z isn't used and remains unchanged;
34675                  * 1, Z contains the square matrix (array whose indexes range within [0..N-1, 0..N-1])
34676                    which reduces the given symmetric matrix to  tridiagonal form;
34677                  * 2, Z isn't used (but changed on the exit).
34678 
34679 Output parameters:
34680     D       -   array of the eigenvalues found.
34681                 Array whose index ranges within [0..I2-I1].
34682     Z       -   if ZNeeded is equal to:
34683                  * 0, doesn't contain any information;
34684                  * 1, contains the product of a given NxN matrix Z (from the left) and
34685                    Nx(I2-I1) matrix of the eigenvectors found (from the right).
34686                    Array whose indexes range within [0..N-1, 0..I2-I1].
34687                  * 2, contains the matrix of the eigenvalues found.
34688                    Array whose indexes range within [0..N-1, 0..I2-I1].
34689 
34690 
34691 Result:
34692 
34693     True, if successful. In that case, D contains the eigenvalues,
34694     Z contains the eigenvectors (if needed).
34695     It should be noted that the subroutine changes the size of arrays D and Z.
34696 
34697     False, if the bisection method subroutine wasn't able to find the eigenvalues
34698     in the given interval or if the inverse iteration subroutine wasn't able
34699     to find all the corresponding eigenvectors. In that case, the eigenvalues
34700     and eigenvectors are not returned.
34701 
34702   -- ALGLIB --
34703      Copyright 25.12.2005 by Bochkanov Sergey
34704 *************************************************************************/
smatrixtdevdi(ae_vector * d,ae_vector * e,ae_int_t n,ae_int_t zneeded,ae_int_t i1,ae_int_t i2,ae_matrix * z,ae_state * _state)34705 ae_bool smatrixtdevdi(/* Real    */ ae_vector* d,
34706      /* Real    */ ae_vector* e,
34707      ae_int_t n,
34708      ae_int_t zneeded,
34709      ae_int_t i1,
34710      ae_int_t i2,
34711      /* Real    */ ae_matrix* z,
34712      ae_state *_state)
34713 {
34714     ae_frame _frame_block;
34715     ae_int_t errorcode;
34716     ae_int_t nsplit;
34717     ae_int_t i;
34718     ae_int_t j;
34719     ae_int_t k;
34720     ae_int_t m;
34721     ae_int_t cr;
34722     ae_vector iblock;
34723     ae_vector isplit;
34724     ae_vector ifail;
34725     ae_vector w;
34726     ae_vector d1;
34727     ae_vector e1;
34728     ae_matrix z2;
34729     ae_matrix z3;
34730     double v;
34731     ae_bool result;
34732 
34733     ae_frame_make(_state, &_frame_block);
34734     memset(&iblock, 0, sizeof(iblock));
34735     memset(&isplit, 0, sizeof(isplit));
34736     memset(&ifail, 0, sizeof(ifail));
34737     memset(&w, 0, sizeof(w));
34738     memset(&d1, 0, sizeof(d1));
34739     memset(&e1, 0, sizeof(e1));
34740     memset(&z2, 0, sizeof(z2));
34741     memset(&z3, 0, sizeof(z3));
34742     ae_vector_init(&iblock, 0, DT_INT, _state, ae_true);
34743     ae_vector_init(&isplit, 0, DT_INT, _state, ae_true);
34744     ae_vector_init(&ifail, 0, DT_INT, _state, ae_true);
34745     ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
34746     ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
34747     ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
34748     ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true);
34749     ae_matrix_init(&z3, 0, 0, DT_REAL, _state, ae_true);
34750 
34751     ae_assert((0<=i1&&i1<=i2)&&i2<n, "SMatrixTDEVDI: incorrect I1/I2!", _state);
34752 
34753     /*
34754      * Copy D,E to D1, E1
34755      */
34756     ae_vector_set_length(&d1, n+1, _state);
34757     ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
34758     if( n>1 )
34759     {
34760         ae_vector_set_length(&e1, n-1+1, _state);
34761         ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
34762     }
34763 
34764     /*
34765      * No eigen vectors
34766      */
34767     if( zneeded==0 )
34768     {
34769         result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 1, (double)(0), (double)(0), i1+1, i2+1, (double)(-1), &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state);
34770         if( !result )
34771         {
34772             ae_frame_leave(_state);
34773             return result;
34774         }
34775         if( m!=i2-i1+1 )
34776         {
34777             result = ae_false;
34778             ae_frame_leave(_state);
34779             return result;
34780         }
34781         ae_vector_set_length(d, m-1+1, _state);
34782         for(i=1; i<=m; i++)
34783         {
34784             d->ptr.p_double[i-1] = w.ptr.p_double[i];
34785         }
34786         ae_frame_leave(_state);
34787         return result;
34788     }
34789 
34790     /*
34791      * Eigen vectors are multiplied by Z
34792      */
34793     if( zneeded==1 )
34794     {
34795 
34796         /*
34797          * Find eigen pairs
34798          */
34799         result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 2, (double)(0), (double)(0), i1+1, i2+1, (double)(-1), &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state);
34800         if( !result )
34801         {
34802             ae_frame_leave(_state);
34803             return result;
34804         }
34805         if( m!=i2-i1+1 )
34806         {
34807             result = ae_false;
34808             ae_frame_leave(_state);
34809             return result;
34810         }
34811         evd_internaldstein(n, &d1, &e1, m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
34812         if( cr!=0 )
34813         {
34814             result = ae_false;
34815             ae_frame_leave(_state);
34816             return result;
34817         }
34818 
34819         /*
34820          * Sort eigen values and vectors
34821          */
34822         for(i=1; i<=m; i++)
34823         {
34824             k = i;
34825             for(j=i; j<=m; j++)
34826             {
34827                 if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
34828                 {
34829                     k = j;
34830                 }
34831             }
34832             v = w.ptr.p_double[i];
34833             w.ptr.p_double[i] = w.ptr.p_double[k];
34834             w.ptr.p_double[k] = v;
34835             for(j=1; j<=n; j++)
34836             {
34837                 v = z2.ptr.pp_double[j][i];
34838                 z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
34839                 z2.ptr.pp_double[j][k] = v;
34840             }
34841         }
34842 
34843         /*
34844          * Transform Z2 and overwrite Z
34845          */
34846         ae_matrix_set_length(&z3, m+1, n+1, _state);
34847         for(i=1; i<=m; i++)
34848         {
34849             ae_v_move(&z3.ptr.pp_double[i][1], 1, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(1,n));
34850         }
34851         for(i=1; i<=n; i++)
34852         {
34853             for(j=1; j<=m; j++)
34854             {
34855                 v = ae_v_dotproduct(&z->ptr.pp_double[i-1][0], 1, &z3.ptr.pp_double[j][1], 1, ae_v_len(0,n-1));
34856                 z2.ptr.pp_double[i][j] = v;
34857             }
34858         }
34859         ae_matrix_set_length(z, n-1+1, m-1+1, _state);
34860         for(i=1; i<=m; i++)
34861         {
34862             ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
34863         }
34864 
34865         /*
34866          * Store W
34867          */
34868         ae_vector_set_length(d, m-1+1, _state);
34869         for(i=1; i<=m; i++)
34870         {
34871             d->ptr.p_double[i-1] = w.ptr.p_double[i];
34872         }
34873         ae_frame_leave(_state);
34874         return result;
34875     }
34876 
34877     /*
34878      * Eigen vectors are stored in Z
34879      */
34880     if( zneeded==2 )
34881     {
34882 
34883         /*
34884          * Find eigen pairs
34885          */
34886         result = evd_internalbisectioneigenvalues(&d1, &e1, n, 3, 2, (double)(0), (double)(0), i1+1, i2+1, (double)(-1), &w, &m, &nsplit, &iblock, &isplit, &errorcode, _state);
34887         if( !result )
34888         {
34889             ae_frame_leave(_state);
34890             return result;
34891         }
34892         if( m!=i2-i1+1 )
34893         {
34894             result = ae_false;
34895             ae_frame_leave(_state);
34896             return result;
34897         }
34898         evd_internaldstein(n, &d1, &e1, m, &w, &iblock, &isplit, &z2, &ifail, &cr, _state);
34899         if( cr!=0 )
34900         {
34901             result = ae_false;
34902             ae_frame_leave(_state);
34903             return result;
34904         }
34905 
34906         /*
34907          * Sort eigen values and vectors
34908          */
34909         for(i=1; i<=m; i++)
34910         {
34911             k = i;
34912             for(j=i; j<=m; j++)
34913             {
34914                 if( ae_fp_less(w.ptr.p_double[j],w.ptr.p_double[k]) )
34915                 {
34916                     k = j;
34917                 }
34918             }
34919             v = w.ptr.p_double[i];
34920             w.ptr.p_double[i] = w.ptr.p_double[k];
34921             w.ptr.p_double[k] = v;
34922             for(j=1; j<=n; j++)
34923             {
34924                 v = z2.ptr.pp_double[j][i];
34925                 z2.ptr.pp_double[j][i] = z2.ptr.pp_double[j][k];
34926                 z2.ptr.pp_double[j][k] = v;
34927             }
34928         }
34929 
34930         /*
34931          * Store Z
34932          */
34933         ae_matrix_set_length(z, n-1+1, m-1+1, _state);
34934         for(i=1; i<=m; i++)
34935         {
34936             ae_v_move(&z->ptr.pp_double[0][i-1], z->stride, &z2.ptr.pp_double[1][i], z2.stride, ae_v_len(0,n-1));
34937         }
34938 
34939         /*
34940          * Store W
34941          */
34942         ae_vector_set_length(d, m-1+1, _state);
34943         for(i=1; i<=m; i++)
34944         {
34945             d->ptr.p_double[i-1] = w.ptr.p_double[i];
34946         }
34947         ae_frame_leave(_state);
34948         return result;
34949     }
34950     result = ae_false;
34951     ae_frame_leave(_state);
34952     return result;
34953 }
34954 
34955 
34956 /*************************************************************************
34957 Finding eigenvalues and eigenvectors of a general (unsymmetric) matrix
34958 
34959   ! COMMERCIAL EDITION OF ALGLIB:
34960   !
34961   ! Commercial Edition of ALGLIB includes following important improvements
34962   ! of this function:
34963   ! * high-performance native backend with same C# interface (C# version)
34964   ! * hardware vendor (Intel) implementations of linear algebra primitives
34965   !   (C++ and C# versions, x86/x64 platform)
34966   !
34967   ! We recommend you to read 'Working with commercial version' section  of
34968   ! ALGLIB Reference Manual in order to find out how to  use  performance-
34969   ! related features provided by commercial edition of ALGLIB.
34970 
34971 The algorithm finds eigenvalues and eigenvectors of a general matrix by
34972 using the QR algorithm with multiple shifts. The algorithm can find
34973 eigenvalues and both left and right eigenvectors.
34974 
34975 The right eigenvector is a vector x such that A*x = w*x, and the left
34976 eigenvector is a vector y such that y'*A = w*y' (here y' implies a complex
34977 conjugate transposition of vector y).
34978 
34979 Input parameters:
34980     A       -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
34981     N       -   size of matrix A.
34982     VNeeded -   flag controlling whether eigenvectors are needed or not.
34983                 If VNeeded is equal to:
34984                  * 0, eigenvectors are not returned;
34985                  * 1, right eigenvectors are returned;
34986                  * 2, left eigenvectors are returned;
34987                  * 3, both left and right eigenvectors are returned.
34988 
34989 Output parameters:
34990     WR      -   real parts of eigenvalues.
34991                 Array whose index ranges within [0..N-1].
34992     WR      -   imaginary parts of eigenvalues.
34993                 Array whose index ranges within [0..N-1].
34994     VL, VR  -   arrays of left and right eigenvectors (if they are needed).
34995                 If WI[i]=0, the respective eigenvalue is a real number,
34996                 and it corresponds to the column number I of matrices VL/VR.
34997                 If WI[i]>0, we have a pair of complex conjugate numbers with
34998                 positive and negative imaginary parts:
34999                     the first eigenvalue WR[i] + sqrt(-1)*WI[i];
35000                     the second eigenvalue WR[i+1] + sqrt(-1)*WI[i+1];
35001                     WI[i]>0
35002                     WI[i+1] = -WI[i] < 0
35003                 In that case, the eigenvector  corresponding to the first
35004                 eigenvalue is located in i and i+1 columns of matrices
35005                 VL/VR (the column number i contains the real part, and the
35006                 column number i+1 contains the imaginary part), and the vector
35007                 corresponding to the second eigenvalue is a complex conjugate to
35008                 the first vector.
35009                 Arrays whose indexes range within [0..N-1, 0..N-1].
35010 
35011 Result:
35012     True, if the algorithm has converged.
35013     False, if the algorithm has not converged.
35014 
35015 Note 1:
35016     Some users may ask the following question: what if WI[N-1]>0?
35017     WI[N] must contain an eigenvalue which is complex conjugate to the
35018     N-th eigenvalue, but the array has only size N?
35019     The answer is as follows: such a situation cannot occur because the
35020     algorithm finds a pairs of eigenvalues, therefore, if WI[i]>0, I is
35021     strictly less than N-1.
35022 
35023 Note 2:
35024     The algorithm performance depends on the value of the internal parameter
35025     NS of the InternalSchurDecomposition subroutine which defines the number
35026     of shifts in the QR algorithm (similarly to the block width in block-matrix
35027     algorithms of linear algebra). If you require maximum performance
35028     on your machine, it is recommended to adjust this parameter manually.
35029 
35030 
35031 See also the InternalTREVC subroutine.
35032 
35033 The algorithm is based on the LAPACK 3.0 library.
35034 *************************************************************************/
rmatrixevd(ae_matrix * a,ae_int_t n,ae_int_t vneeded,ae_vector * wr,ae_vector * wi,ae_matrix * vl,ae_matrix * vr,ae_state * _state)35035 ae_bool rmatrixevd(/* Real    */ ae_matrix* a,
35036      ae_int_t n,
35037      ae_int_t vneeded,
35038      /* Real    */ ae_vector* wr,
35039      /* Real    */ ae_vector* wi,
35040      /* Real    */ ae_matrix* vl,
35041      /* Real    */ ae_matrix* vr,
35042      ae_state *_state)
35043 {
35044     ae_frame _frame_block;
35045     ae_matrix _a;
35046     ae_matrix a1;
35047     ae_matrix vl1;
35048     ae_matrix vr1;
35049     ae_matrix s1;
35050     ae_matrix s;
35051     ae_matrix dummy;
35052     ae_vector wr1;
35053     ae_vector wi1;
35054     ae_vector tau;
35055     ae_int_t i;
35056     ae_int_t info;
35057     ae_vector sel1;
35058     ae_int_t m1;
35059     ae_bool result;
35060 
35061     ae_frame_make(_state, &_frame_block);
35062     memset(&_a, 0, sizeof(_a));
35063     memset(&a1, 0, sizeof(a1));
35064     memset(&vl1, 0, sizeof(vl1));
35065     memset(&vr1, 0, sizeof(vr1));
35066     memset(&s1, 0, sizeof(s1));
35067     memset(&s, 0, sizeof(s));
35068     memset(&dummy, 0, sizeof(dummy));
35069     memset(&wr1, 0, sizeof(wr1));
35070     memset(&wi1, 0, sizeof(wi1));
35071     memset(&tau, 0, sizeof(tau));
35072     memset(&sel1, 0, sizeof(sel1));
35073     ae_matrix_init_copy(&_a, a, _state, ae_true);
35074     a = &_a;
35075     ae_vector_clear(wr);
35076     ae_vector_clear(wi);
35077     ae_matrix_clear(vl);
35078     ae_matrix_clear(vr);
35079     ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true);
35080     ae_matrix_init(&vl1, 0, 0, DT_REAL, _state, ae_true);
35081     ae_matrix_init(&vr1, 0, 0, DT_REAL, _state, ae_true);
35082     ae_matrix_init(&s1, 0, 0, DT_REAL, _state, ae_true);
35083     ae_matrix_init(&s, 0, 0, DT_REAL, _state, ae_true);
35084     ae_matrix_init(&dummy, 0, 0, DT_REAL, _state, ae_true);
35085     ae_vector_init(&wr1, 0, DT_REAL, _state, ae_true);
35086     ae_vector_init(&wi1, 0, DT_REAL, _state, ae_true);
35087     ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
35088     ae_vector_init(&sel1, 0, DT_BOOL, _state, ae_true);
35089 
35090     ae_assert(vneeded>=0&&vneeded<=3, "RMatrixEVD: incorrect VNeeded!", _state);
35091     if( vneeded==0 )
35092     {
35093 
35094         /*
35095          * Eigen values only
35096          */
35097         rmatrixhessenberg(a, n, &tau, _state);
35098         rmatrixinternalschurdecomposition(a, n, 0, 0, wr, wi, &dummy, &info, _state);
35099         result = info==0;
35100         ae_frame_leave(_state);
35101         return result;
35102     }
35103 
35104     /*
35105      * Eigen values and vectors
35106      */
35107     rmatrixhessenberg(a, n, &tau, _state);
35108     rmatrixhessenbergunpackq(a, n, &tau, &s, _state);
35109     rmatrixinternalschurdecomposition(a, n, 1, 1, wr, wi, &s, &info, _state);
35110     result = info==0;
35111     if( !result )
35112     {
35113         ae_frame_leave(_state);
35114         return result;
35115     }
35116     if( vneeded==1||vneeded==3 )
35117     {
35118         ae_matrix_set_length(vr, n, n, _state);
35119         for(i=0; i<=n-1; i++)
35120         {
35121             ae_v_move(&vr->ptr.pp_double[i][0], 1, &s.ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
35122         }
35123     }
35124     if( vneeded==2||vneeded==3 )
35125     {
35126         ae_matrix_set_length(vl, n, n, _state);
35127         for(i=0; i<=n-1; i++)
35128         {
35129             ae_v_move(&vl->ptr.pp_double[i][0], 1, &s.ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
35130         }
35131     }
35132     evd_rmatrixinternaltrevc(a, n, vneeded, 1, &sel1, vl, vr, &m1, &info, _state);
35133     result = info==0;
35134     ae_frame_leave(_state);
35135     return result;
35136 }
35137 
35138 
35139 /*************************************************************************
35140 Clears request fileds (to be sure that we don't forgot to clear something)
35141 *************************************************************************/
evd_clearrfields(eigsubspacestate * state,ae_state * _state)35142 static void evd_clearrfields(eigsubspacestate* state, ae_state *_state)
35143 {
35144 
35145 
35146     state->requesttype = -1;
35147     state->requestsize = -1;
35148 }
35149 
35150 
evd_tridiagonalevd(ae_vector * d,ae_vector * e,ae_int_t n,ae_int_t zneeded,ae_matrix * z,ae_state * _state)35151 static ae_bool evd_tridiagonalevd(/* Real    */ ae_vector* d,
35152      /* Real    */ ae_vector* e,
35153      ae_int_t n,
35154      ae_int_t zneeded,
35155      /* Real    */ ae_matrix* z,
35156      ae_state *_state)
35157 {
35158     ae_frame _frame_block;
35159     ae_vector _e;
35160     ae_int_t maxit;
35161     ae_int_t i;
35162     ae_int_t ii;
35163     ae_int_t iscale;
35164     ae_int_t j;
35165     ae_int_t jtot;
35166     ae_int_t k;
35167     ae_int_t t;
35168     ae_int_t l;
35169     ae_int_t l1;
35170     ae_int_t lend;
35171     ae_int_t lendm1;
35172     ae_int_t lendp1;
35173     ae_int_t lendsv;
35174     ae_int_t lm1;
35175     ae_int_t lsv;
35176     ae_int_t m;
35177     ae_int_t mm1;
35178     ae_int_t nm1;
35179     ae_int_t nmaxit;
35180     ae_int_t tmpint;
35181     double anorm;
35182     double b;
35183     double c;
35184     double eps;
35185     double eps2;
35186     double f;
35187     double g;
35188     double p;
35189     double r;
35190     double rt1;
35191     double rt2;
35192     double s;
35193     double safmax;
35194     double safmin;
35195     double ssfmax;
35196     double ssfmin;
35197     double tst;
35198     double tmp;
35199     ae_vector work1;
35200     ae_vector work2;
35201     ae_vector workc;
35202     ae_vector works;
35203     ae_vector wtemp;
35204     ae_bool gotoflag;
35205     ae_int_t zrows;
35206     ae_bool wastranspose;
35207     ae_bool result;
35208 
35209     ae_frame_make(_state, &_frame_block);
35210     memset(&_e, 0, sizeof(_e));
35211     memset(&work1, 0, sizeof(work1));
35212     memset(&work2, 0, sizeof(work2));
35213     memset(&workc, 0, sizeof(workc));
35214     memset(&works, 0, sizeof(works));
35215     memset(&wtemp, 0, sizeof(wtemp));
35216     ae_vector_init_copy(&_e, e, _state, ae_true);
35217     e = &_e;
35218     ae_vector_init(&work1, 0, DT_REAL, _state, ae_true);
35219     ae_vector_init(&work2, 0, DT_REAL, _state, ae_true);
35220     ae_vector_init(&workc, 0, DT_REAL, _state, ae_true);
35221     ae_vector_init(&works, 0, DT_REAL, _state, ae_true);
35222     ae_vector_init(&wtemp, 0, DT_REAL, _state, ae_true);
35223 
35224     ae_assert(zneeded>=0&&zneeded<=3, "TridiagonalEVD: Incorrent ZNeeded", _state);
35225 
35226     /*
35227      * Quick return if possible
35228      */
35229     if( zneeded<0||zneeded>3 )
35230     {
35231         result = ae_false;
35232         ae_frame_leave(_state);
35233         return result;
35234     }
35235     result = ae_true;
35236     if( n==0 )
35237     {
35238         ae_frame_leave(_state);
35239         return result;
35240     }
35241     if( n==1 )
35242     {
35243         if( zneeded==2||zneeded==3 )
35244         {
35245             ae_matrix_set_length(z, 1+1, 1+1, _state);
35246             z->ptr.pp_double[1][1] = (double)(1);
35247         }
35248         ae_frame_leave(_state);
35249         return result;
35250     }
35251     maxit = 30;
35252 
35253     /*
35254      * Initialize arrays
35255      */
35256     ae_vector_set_length(&wtemp, n+1, _state);
35257     ae_vector_set_length(&work1, n-1+1, _state);
35258     ae_vector_set_length(&work2, n-1+1, _state);
35259     ae_vector_set_length(&workc, n+1, _state);
35260     ae_vector_set_length(&works, n+1, _state);
35261 
35262     /*
35263      * Determine the unit roundoff and over/underflow thresholds.
35264      */
35265     eps = ae_machineepsilon;
35266     eps2 = ae_sqr(eps, _state);
35267     safmin = ae_minrealnumber;
35268     safmax = ae_maxrealnumber;
35269     ssfmax = ae_sqrt(safmax, _state)/3;
35270     ssfmin = ae_sqrt(safmin, _state)/eps2;
35271 
35272     /*
35273      * Prepare Z
35274      *
35275      * Here we are using transposition to get rid of column operations
35276      *
35277      */
35278     wastranspose = ae_false;
35279     zrows = 0;
35280     if( zneeded==1 )
35281     {
35282         zrows = n;
35283     }
35284     if( zneeded==2 )
35285     {
35286         zrows = n;
35287     }
35288     if( zneeded==3 )
35289     {
35290         zrows = 1;
35291     }
35292     if( zneeded==1 )
35293     {
35294         wastranspose = ae_true;
35295         inplacetranspose(z, 1, n, 1, n, &wtemp, _state);
35296     }
35297     if( zneeded==2 )
35298     {
35299         wastranspose = ae_true;
35300         ae_matrix_set_length(z, n+1, n+1, _state);
35301         for(i=1; i<=n; i++)
35302         {
35303             for(j=1; j<=n; j++)
35304             {
35305                 if( i==j )
35306                 {
35307                     z->ptr.pp_double[i][j] = (double)(1);
35308                 }
35309                 else
35310                 {
35311                     z->ptr.pp_double[i][j] = (double)(0);
35312                 }
35313             }
35314         }
35315     }
35316     if( zneeded==3 )
35317     {
35318         wastranspose = ae_false;
35319         ae_matrix_set_length(z, 1+1, n+1, _state);
35320         for(j=1; j<=n; j++)
35321         {
35322             if( j==1 )
35323             {
35324                 z->ptr.pp_double[1][j] = (double)(1);
35325             }
35326             else
35327             {
35328                 z->ptr.pp_double[1][j] = (double)(0);
35329             }
35330         }
35331     }
35332     nmaxit = n*maxit;
35333     jtot = 0;
35334 
35335     /*
35336      * Determine where the matrix splits and choose QL or QR iteration
35337      * for each block, according to whether top or bottom diagonal
35338      * element is smaller.
35339      */
35340     l1 = 1;
35341     nm1 = n-1;
35342     for(;;)
35343     {
35344         if( l1>n )
35345         {
35346             break;
35347         }
35348         if( l1>1 )
35349         {
35350             e->ptr.p_double[l1-1] = (double)(0);
35351         }
35352         gotoflag = ae_false;
35353         m = l1;
35354         if( l1<=nm1 )
35355         {
35356             for(m=l1; m<=nm1; m++)
35357             {
35358                 tst = ae_fabs(e->ptr.p_double[m], _state);
35359                 if( ae_fp_eq(tst,(double)(0)) )
35360                 {
35361                     gotoflag = ae_true;
35362                     break;
35363                 }
35364                 if( ae_fp_less_eq(tst,ae_sqrt(ae_fabs(d->ptr.p_double[m], _state), _state)*ae_sqrt(ae_fabs(d->ptr.p_double[m+1], _state), _state)*eps) )
35365                 {
35366                     e->ptr.p_double[m] = (double)(0);
35367                     gotoflag = ae_true;
35368                     break;
35369                 }
35370             }
35371         }
35372         if( !gotoflag )
35373         {
35374             m = n;
35375         }
35376 
35377         /*
35378          * label 30:
35379          */
35380         l = l1;
35381         lsv = l;
35382         lend = m;
35383         lendsv = lend;
35384         l1 = m+1;
35385         if( lend==l )
35386         {
35387             continue;
35388         }
35389 
35390         /*
35391          * Scale submatrix in rows and columns L to LEND
35392          */
35393         if( l==lend )
35394         {
35395             anorm = ae_fabs(d->ptr.p_double[l], _state);
35396         }
35397         else
35398         {
35399             anorm = ae_maxreal(ae_fabs(d->ptr.p_double[l], _state)+ae_fabs(e->ptr.p_double[l], _state), ae_fabs(e->ptr.p_double[lend-1], _state)+ae_fabs(d->ptr.p_double[lend], _state), _state);
35400             for(i=l+1; i<=lend-1; i++)
35401             {
35402                 anorm = ae_maxreal(anorm, ae_fabs(d->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i-1], _state), _state);
35403             }
35404         }
35405         iscale = 0;
35406         if( ae_fp_eq(anorm,(double)(0)) )
35407         {
35408             continue;
35409         }
35410         if( ae_fp_greater(anorm,ssfmax) )
35411         {
35412             iscale = 1;
35413             tmp = ssfmax/anorm;
35414             tmpint = lend-1;
35415             ae_v_muld(&d->ptr.p_double[l], 1, ae_v_len(l,lend), tmp);
35416             ae_v_muld(&e->ptr.p_double[l], 1, ae_v_len(l,tmpint), tmp);
35417         }
35418         if( ae_fp_less(anorm,ssfmin) )
35419         {
35420             iscale = 2;
35421             tmp = ssfmin/anorm;
35422             tmpint = lend-1;
35423             ae_v_muld(&d->ptr.p_double[l], 1, ae_v_len(l,lend), tmp);
35424             ae_v_muld(&e->ptr.p_double[l], 1, ae_v_len(l,tmpint), tmp);
35425         }
35426 
35427         /*
35428          * Choose between QL and QR iteration
35429          */
35430         if( ae_fp_less(ae_fabs(d->ptr.p_double[lend], _state),ae_fabs(d->ptr.p_double[l], _state)) )
35431         {
35432             lend = lsv;
35433             l = lendsv;
35434         }
35435         if( lend>l )
35436         {
35437 
35438             /*
35439              * QL Iteration
35440              *
35441              * Look for small subdiagonal element.
35442              */
35443             for(;;)
35444             {
35445                 gotoflag = ae_false;
35446                 if( l!=lend )
35447                 {
35448                     lendm1 = lend-1;
35449                     for(m=l; m<=lendm1; m++)
35450                     {
35451                         tst = ae_sqr(ae_fabs(e->ptr.p_double[m], _state), _state);
35452                         if( ae_fp_less_eq(tst,eps2*ae_fabs(d->ptr.p_double[m], _state)*ae_fabs(d->ptr.p_double[m+1], _state)+safmin) )
35453                         {
35454                             gotoflag = ae_true;
35455                             break;
35456                         }
35457                     }
35458                 }
35459                 if( !gotoflag )
35460                 {
35461                     m = lend;
35462                 }
35463                 if( m<lend )
35464                 {
35465                     e->ptr.p_double[m] = (double)(0);
35466                 }
35467                 p = d->ptr.p_double[l];
35468                 if( m!=l )
35469                 {
35470 
35471                     /*
35472                      * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
35473                      * to compute its eigensystem.
35474                      */
35475                     if( m==l+1 )
35476                     {
35477                         if( zneeded>0 )
35478                         {
35479                             evd_tdevdev2(d->ptr.p_double[l], e->ptr.p_double[l], d->ptr.p_double[l+1], &rt1, &rt2, &c, &s, _state);
35480                             work1.ptr.p_double[l] = c;
35481                             work2.ptr.p_double[l] = s;
35482                             workc.ptr.p_double[1] = work1.ptr.p_double[l];
35483                             works.ptr.p_double[1] = work2.ptr.p_double[l];
35484                             if( !wastranspose )
35485                             {
35486                                 applyrotationsfromtheright(ae_false, 1, zrows, l, l+1, &workc, &works, z, &wtemp, _state);
35487                             }
35488                             else
35489                             {
35490                                 applyrotationsfromtheleft(ae_false, l, l+1, 1, zrows, &workc, &works, z, &wtemp, _state);
35491                             }
35492                         }
35493                         else
35494                         {
35495                             evd_tdevde2(d->ptr.p_double[l], e->ptr.p_double[l], d->ptr.p_double[l+1], &rt1, &rt2, _state);
35496                         }
35497                         d->ptr.p_double[l] = rt1;
35498                         d->ptr.p_double[l+1] = rt2;
35499                         e->ptr.p_double[l] = (double)(0);
35500                         l = l+2;
35501                         if( l<=lend )
35502                         {
35503                             continue;
35504                         }
35505 
35506                         /*
35507                          * GOTO 140
35508                          */
35509                         break;
35510                     }
35511                     if( jtot==nmaxit )
35512                     {
35513 
35514                         /*
35515                          * GOTO 140
35516                          */
35517                         break;
35518                     }
35519                     jtot = jtot+1;
35520 
35521                     /*
35522                      * Form shift.
35523                      */
35524                     g = (d->ptr.p_double[l+1]-p)/(2*e->ptr.p_double[l]);
35525                     r = evd_tdevdpythag(g, (double)(1), _state);
35526                     g = d->ptr.p_double[m]-p+e->ptr.p_double[l]/(g+evd_tdevdextsign(r, g, _state));
35527                     s = (double)(1);
35528                     c = (double)(1);
35529                     p = (double)(0);
35530 
35531                     /*
35532                      * Inner loop
35533                      */
35534                     mm1 = m-1;
35535                     for(i=mm1; i>=l; i--)
35536                     {
35537                         f = s*e->ptr.p_double[i];
35538                         b = c*e->ptr.p_double[i];
35539                         generaterotation(g, f, &c, &s, &r, _state);
35540                         if( i!=m-1 )
35541                         {
35542                             e->ptr.p_double[i+1] = r;
35543                         }
35544                         g = d->ptr.p_double[i+1]-p;
35545                         r = (d->ptr.p_double[i]-g)*s+2*c*b;
35546                         p = s*r;
35547                         d->ptr.p_double[i+1] = g+p;
35548                         g = c*r-b;
35549 
35550                         /*
35551                          * If eigenvectors are desired, then save rotations.
35552                          */
35553                         if( zneeded>0 )
35554                         {
35555                             work1.ptr.p_double[i] = c;
35556                             work2.ptr.p_double[i] = -s;
35557                         }
35558                     }
35559 
35560                     /*
35561                      * If eigenvectors are desired, then apply saved rotations.
35562                      */
35563                     if( zneeded>0 )
35564                     {
35565                         for(i=l; i<=m-1; i++)
35566                         {
35567                             workc.ptr.p_double[i-l+1] = work1.ptr.p_double[i];
35568                             works.ptr.p_double[i-l+1] = work2.ptr.p_double[i];
35569                         }
35570                         if( !wastranspose )
35571                         {
35572                             applyrotationsfromtheright(ae_false, 1, zrows, l, m, &workc, &works, z, &wtemp, _state);
35573                         }
35574                         else
35575                         {
35576                             applyrotationsfromtheleft(ae_false, l, m, 1, zrows, &workc, &works, z, &wtemp, _state);
35577                         }
35578                     }
35579                     d->ptr.p_double[l] = d->ptr.p_double[l]-p;
35580                     e->ptr.p_double[l] = g;
35581                     continue;
35582                 }
35583 
35584                 /*
35585                  * Eigenvalue found.
35586                  */
35587                 d->ptr.p_double[l] = p;
35588                 l = l+1;
35589                 if( l<=lend )
35590                 {
35591                     continue;
35592                 }
35593                 break;
35594             }
35595         }
35596         else
35597         {
35598 
35599             /*
35600              * QR Iteration
35601              *
35602              * Look for small superdiagonal element.
35603              */
35604             for(;;)
35605             {
35606                 gotoflag = ae_false;
35607                 if( l!=lend )
35608                 {
35609                     lendp1 = lend+1;
35610                     for(m=l; m>=lendp1; m--)
35611                     {
35612                         tst = ae_sqr(ae_fabs(e->ptr.p_double[m-1], _state), _state);
35613                         if( ae_fp_less_eq(tst,eps2*ae_fabs(d->ptr.p_double[m], _state)*ae_fabs(d->ptr.p_double[m-1], _state)+safmin) )
35614                         {
35615                             gotoflag = ae_true;
35616                             break;
35617                         }
35618                     }
35619                 }
35620                 if( !gotoflag )
35621                 {
35622                     m = lend;
35623                 }
35624                 if( m>lend )
35625                 {
35626                     e->ptr.p_double[m-1] = (double)(0);
35627                 }
35628                 p = d->ptr.p_double[l];
35629                 if( m!=l )
35630                 {
35631 
35632                     /*
35633                      * If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
35634                      * to compute its eigensystem.
35635                      */
35636                     if( m==l-1 )
35637                     {
35638                         if( zneeded>0 )
35639                         {
35640                             evd_tdevdev2(d->ptr.p_double[l-1], e->ptr.p_double[l-1], d->ptr.p_double[l], &rt1, &rt2, &c, &s, _state);
35641                             work1.ptr.p_double[m] = c;
35642                             work2.ptr.p_double[m] = s;
35643                             workc.ptr.p_double[1] = c;
35644                             works.ptr.p_double[1] = s;
35645                             if( !wastranspose )
35646                             {
35647                                 applyrotationsfromtheright(ae_true, 1, zrows, l-1, l, &workc, &works, z, &wtemp, _state);
35648                             }
35649                             else
35650                             {
35651                                 applyrotationsfromtheleft(ae_true, l-1, l, 1, zrows, &workc, &works, z, &wtemp, _state);
35652                             }
35653                         }
35654                         else
35655                         {
35656                             evd_tdevde2(d->ptr.p_double[l-1], e->ptr.p_double[l-1], d->ptr.p_double[l], &rt1, &rt2, _state);
35657                         }
35658                         d->ptr.p_double[l-1] = rt1;
35659                         d->ptr.p_double[l] = rt2;
35660                         e->ptr.p_double[l-1] = (double)(0);
35661                         l = l-2;
35662                         if( l>=lend )
35663                         {
35664                             continue;
35665                         }
35666                         break;
35667                     }
35668                     if( jtot==nmaxit )
35669                     {
35670                         break;
35671                     }
35672                     jtot = jtot+1;
35673 
35674                     /*
35675                      * Form shift.
35676                      */
35677                     g = (d->ptr.p_double[l-1]-p)/(2*e->ptr.p_double[l-1]);
35678                     r = evd_tdevdpythag(g, (double)(1), _state);
35679                     g = d->ptr.p_double[m]-p+e->ptr.p_double[l-1]/(g+evd_tdevdextsign(r, g, _state));
35680                     s = (double)(1);
35681                     c = (double)(1);
35682                     p = (double)(0);
35683 
35684                     /*
35685                      * Inner loop
35686                      */
35687                     lm1 = l-1;
35688                     for(i=m; i<=lm1; i++)
35689                     {
35690                         f = s*e->ptr.p_double[i];
35691                         b = c*e->ptr.p_double[i];
35692                         generaterotation(g, f, &c, &s, &r, _state);
35693                         if( i!=m )
35694                         {
35695                             e->ptr.p_double[i-1] = r;
35696                         }
35697                         g = d->ptr.p_double[i]-p;
35698                         r = (d->ptr.p_double[i+1]-g)*s+2*c*b;
35699                         p = s*r;
35700                         d->ptr.p_double[i] = g+p;
35701                         g = c*r-b;
35702 
35703                         /*
35704                          * If eigenvectors are desired, then save rotations.
35705                          */
35706                         if( zneeded>0 )
35707                         {
35708                             work1.ptr.p_double[i] = c;
35709                             work2.ptr.p_double[i] = s;
35710                         }
35711                     }
35712 
35713                     /*
35714                      * If eigenvectors are desired, then apply saved rotations.
35715                      */
35716                     if( zneeded>0 )
35717                     {
35718                         for(i=m; i<=l-1; i++)
35719                         {
35720                             workc.ptr.p_double[i-m+1] = work1.ptr.p_double[i];
35721                             works.ptr.p_double[i-m+1] = work2.ptr.p_double[i];
35722                         }
35723                         if( !wastranspose )
35724                         {
35725                             applyrotationsfromtheright(ae_true, 1, zrows, m, l, &workc, &works, z, &wtemp, _state);
35726                         }
35727                         else
35728                         {
35729                             applyrotationsfromtheleft(ae_true, m, l, 1, zrows, &workc, &works, z, &wtemp, _state);
35730                         }
35731                     }
35732                     d->ptr.p_double[l] = d->ptr.p_double[l]-p;
35733                     e->ptr.p_double[lm1] = g;
35734                     continue;
35735                 }
35736 
35737                 /*
35738                  * Eigenvalue found.
35739                  */
35740                 d->ptr.p_double[l] = p;
35741                 l = l-1;
35742                 if( l>=lend )
35743                 {
35744                     continue;
35745                 }
35746                 break;
35747             }
35748         }
35749 
35750         /*
35751          * Undo scaling if necessary
35752          */
35753         if( iscale==1 )
35754         {
35755             tmp = anorm/ssfmax;
35756             tmpint = lendsv-1;
35757             ae_v_muld(&d->ptr.p_double[lsv], 1, ae_v_len(lsv,lendsv), tmp);
35758             ae_v_muld(&e->ptr.p_double[lsv], 1, ae_v_len(lsv,tmpint), tmp);
35759         }
35760         if( iscale==2 )
35761         {
35762             tmp = anorm/ssfmin;
35763             tmpint = lendsv-1;
35764             ae_v_muld(&d->ptr.p_double[lsv], 1, ae_v_len(lsv,lendsv), tmp);
35765             ae_v_muld(&e->ptr.p_double[lsv], 1, ae_v_len(lsv,tmpint), tmp);
35766         }
35767 
35768         /*
35769          * Check for no convergence to an eigenvalue after a total
35770          * of N*MAXIT iterations.
35771          */
35772         if( jtot>=nmaxit )
35773         {
35774             result = ae_false;
35775             if( wastranspose )
35776             {
35777                 inplacetranspose(z, 1, n, 1, n, &wtemp, _state);
35778             }
35779             ae_frame_leave(_state);
35780             return result;
35781         }
35782     }
35783 
35784     /*
35785      * Order eigenvalues and eigenvectors.
35786      */
35787     if( zneeded==0 )
35788     {
35789 
35790         /*
35791          * Sort
35792          */
35793         if( n==1 )
35794         {
35795             ae_frame_leave(_state);
35796             return result;
35797         }
35798         if( n==2 )
35799         {
35800             if( ae_fp_greater(d->ptr.p_double[1],d->ptr.p_double[2]) )
35801             {
35802                 tmp = d->ptr.p_double[1];
35803                 d->ptr.p_double[1] = d->ptr.p_double[2];
35804                 d->ptr.p_double[2] = tmp;
35805             }
35806             ae_frame_leave(_state);
35807             return result;
35808         }
35809         i = 2;
35810         do
35811         {
35812             t = i;
35813             while(t!=1)
35814             {
35815                 k = t/2;
35816                 if( ae_fp_greater_eq(d->ptr.p_double[k],d->ptr.p_double[t]) )
35817                 {
35818                     t = 1;
35819                 }
35820                 else
35821                 {
35822                     tmp = d->ptr.p_double[k];
35823                     d->ptr.p_double[k] = d->ptr.p_double[t];
35824                     d->ptr.p_double[t] = tmp;
35825                     t = k;
35826                 }
35827             }
35828             i = i+1;
35829         }
35830         while(i<=n);
35831         i = n-1;
35832         do
35833         {
35834             tmp = d->ptr.p_double[i+1];
35835             d->ptr.p_double[i+1] = d->ptr.p_double[1];
35836             d->ptr.p_double[1] = tmp;
35837             t = 1;
35838             while(t!=0)
35839             {
35840                 k = 2*t;
35841                 if( k>i )
35842                 {
35843                     t = 0;
35844                 }
35845                 else
35846                 {
35847                     if( k<i )
35848                     {
35849                         if( ae_fp_greater(d->ptr.p_double[k+1],d->ptr.p_double[k]) )
35850                         {
35851                             k = k+1;
35852                         }
35853                     }
35854                     if( ae_fp_greater_eq(d->ptr.p_double[t],d->ptr.p_double[k]) )
35855                     {
35856                         t = 0;
35857                     }
35858                     else
35859                     {
35860                         tmp = d->ptr.p_double[k];
35861                         d->ptr.p_double[k] = d->ptr.p_double[t];
35862                         d->ptr.p_double[t] = tmp;
35863                         t = k;
35864                     }
35865                 }
35866             }
35867             i = i-1;
35868         }
35869         while(i>=1);
35870     }
35871     else
35872     {
35873 
35874         /*
35875          * Use Selection Sort to minimize swaps of eigenvectors
35876          */
35877         for(ii=2; ii<=n; ii++)
35878         {
35879             i = ii-1;
35880             k = i;
35881             p = d->ptr.p_double[i];
35882             for(j=ii; j<=n; j++)
35883             {
35884                 if( ae_fp_less(d->ptr.p_double[j],p) )
35885                 {
35886                     k = j;
35887                     p = d->ptr.p_double[j];
35888                 }
35889             }
35890             if( k!=i )
35891             {
35892                 d->ptr.p_double[k] = d->ptr.p_double[i];
35893                 d->ptr.p_double[i] = p;
35894                 if( wastranspose )
35895                 {
35896                     ae_v_move(&wtemp.ptr.p_double[1], 1, &z->ptr.pp_double[i][1], 1, ae_v_len(1,n));
35897                     ae_v_move(&z->ptr.pp_double[i][1], 1, &z->ptr.pp_double[k][1], 1, ae_v_len(1,n));
35898                     ae_v_move(&z->ptr.pp_double[k][1], 1, &wtemp.ptr.p_double[1], 1, ae_v_len(1,n));
35899                 }
35900                 else
35901                 {
35902                     ae_v_move(&wtemp.ptr.p_double[1], 1, &z->ptr.pp_double[1][i], z->stride, ae_v_len(1,zrows));
35903                     ae_v_move(&z->ptr.pp_double[1][i], z->stride, &z->ptr.pp_double[1][k], z->stride, ae_v_len(1,zrows));
35904                     ae_v_move(&z->ptr.pp_double[1][k], z->stride, &wtemp.ptr.p_double[1], 1, ae_v_len(1,zrows));
35905                 }
35906             }
35907         }
35908         if( wastranspose )
35909         {
35910             inplacetranspose(z, 1, n, 1, n, &wtemp, _state);
35911         }
35912     }
35913     ae_frame_leave(_state);
35914     return result;
35915 }
35916 
35917 
35918 /*************************************************************************
35919 DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
35920    [  A   B  ]
35921    [  B   C  ].
35922 On return, RT1 is the eigenvalue of larger absolute value, and RT2
35923 is the eigenvalue of smaller absolute value.
35924 
35925   -- LAPACK auxiliary routine (version 3.0) --
35926      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
35927      Courant Institute, Argonne National Lab, and Rice University
35928      October 31, 1992
35929 *************************************************************************/
evd_tdevde2(double a,double b,double c,double * rt1,double * rt2,ae_state * _state)35930 static void evd_tdevde2(double a,
35931      double b,
35932      double c,
35933      double* rt1,
35934      double* rt2,
35935      ae_state *_state)
35936 {
35937     double ab;
35938     double acmn;
35939     double acmx;
35940     double adf;
35941     double df;
35942     double rt;
35943     double sm;
35944     double tb;
35945 
35946     *rt1 = 0;
35947     *rt2 = 0;
35948 
35949     sm = a+c;
35950     df = a-c;
35951     adf = ae_fabs(df, _state);
35952     tb = b+b;
35953     ab = ae_fabs(tb, _state);
35954     if( ae_fp_greater(ae_fabs(a, _state),ae_fabs(c, _state)) )
35955     {
35956         acmx = a;
35957         acmn = c;
35958     }
35959     else
35960     {
35961         acmx = c;
35962         acmn = a;
35963     }
35964     if( ae_fp_greater(adf,ab) )
35965     {
35966         rt = adf*ae_sqrt(1+ae_sqr(ab/adf, _state), _state);
35967     }
35968     else
35969     {
35970         if( ae_fp_less(adf,ab) )
35971         {
35972             rt = ab*ae_sqrt(1+ae_sqr(adf/ab, _state), _state);
35973         }
35974         else
35975         {
35976 
35977             /*
35978              * Includes case AB=ADF=0
35979              */
35980             rt = ab*ae_sqrt((double)(2), _state);
35981         }
35982     }
35983     if( ae_fp_less(sm,(double)(0)) )
35984     {
35985         *rt1 = 0.5*(sm-rt);
35986 
35987         /*
35988          * Order of execution important.
35989          * To get fully accurate smaller eigenvalue,
35990          * next line needs to be executed in higher precision.
35991          */
35992         *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
35993     }
35994     else
35995     {
35996         if( ae_fp_greater(sm,(double)(0)) )
35997         {
35998             *rt1 = 0.5*(sm+rt);
35999 
36000             /*
36001              * Order of execution important.
36002              * To get fully accurate smaller eigenvalue,
36003              * next line needs to be executed in higher precision.
36004              */
36005             *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
36006         }
36007         else
36008         {
36009 
36010             /*
36011              * Includes case RT1 = RT2 = 0
36012              */
36013             *rt1 = 0.5*rt;
36014             *rt2 = -0.5*rt;
36015         }
36016     }
36017 }
36018 
36019 
36020 /*************************************************************************
36021 DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
36022 
36023    [  A   B  ]
36024    [  B   C  ].
36025 
36026 On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
36027 eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
36028 eigenvector for RT1, giving the decomposition
36029 
36030    [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
36031    [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
36032 
36033 
36034   -- LAPACK auxiliary routine (version 3.0) --
36035      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
36036      Courant Institute, Argonne National Lab, and Rice University
36037      October 31, 1992
36038 *************************************************************************/
evd_tdevdev2(double a,double b,double c,double * rt1,double * rt2,double * cs1,double * sn1,ae_state * _state)36039 static void evd_tdevdev2(double a,
36040      double b,
36041      double c,
36042      double* rt1,
36043      double* rt2,
36044      double* cs1,
36045      double* sn1,
36046      ae_state *_state)
36047 {
36048     ae_int_t sgn1;
36049     ae_int_t sgn2;
36050     double ab;
36051     double acmn;
36052     double acmx;
36053     double acs;
36054     double adf;
36055     double cs;
36056     double ct;
36057     double df;
36058     double rt;
36059     double sm;
36060     double tb;
36061     double tn;
36062 
36063     *rt1 = 0;
36064     *rt2 = 0;
36065     *cs1 = 0;
36066     *sn1 = 0;
36067 
36068 
36069     /*
36070      * Compute the eigenvalues
36071      */
36072     sm = a+c;
36073     df = a-c;
36074     adf = ae_fabs(df, _state);
36075     tb = b+b;
36076     ab = ae_fabs(tb, _state);
36077     if( ae_fp_greater(ae_fabs(a, _state),ae_fabs(c, _state)) )
36078     {
36079         acmx = a;
36080         acmn = c;
36081     }
36082     else
36083     {
36084         acmx = c;
36085         acmn = a;
36086     }
36087     if( ae_fp_greater(adf,ab) )
36088     {
36089         rt = adf*ae_sqrt(1+ae_sqr(ab/adf, _state), _state);
36090     }
36091     else
36092     {
36093         if( ae_fp_less(adf,ab) )
36094         {
36095             rt = ab*ae_sqrt(1+ae_sqr(adf/ab, _state), _state);
36096         }
36097         else
36098         {
36099 
36100             /*
36101              * Includes case AB=ADF=0
36102              */
36103             rt = ab*ae_sqrt((double)(2), _state);
36104         }
36105     }
36106     if( ae_fp_less(sm,(double)(0)) )
36107     {
36108         *rt1 = 0.5*(sm-rt);
36109         sgn1 = -1;
36110 
36111         /*
36112          * Order of execution important.
36113          * To get fully accurate smaller eigenvalue,
36114          * next line needs to be executed in higher precision.
36115          */
36116         *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
36117     }
36118     else
36119     {
36120         if( ae_fp_greater(sm,(double)(0)) )
36121         {
36122             *rt1 = 0.5*(sm+rt);
36123             sgn1 = 1;
36124 
36125             /*
36126              * Order of execution important.
36127              * To get fully accurate smaller eigenvalue,
36128              * next line needs to be executed in higher precision.
36129              */
36130             *rt2 = acmx/(*rt1)*acmn-b/(*rt1)*b;
36131         }
36132         else
36133         {
36134 
36135             /*
36136              * Includes case RT1 = RT2 = 0
36137              */
36138             *rt1 = 0.5*rt;
36139             *rt2 = -0.5*rt;
36140             sgn1 = 1;
36141         }
36142     }
36143 
36144     /*
36145      * Compute the eigenvector
36146      */
36147     if( ae_fp_greater_eq(df,(double)(0)) )
36148     {
36149         cs = df+rt;
36150         sgn2 = 1;
36151     }
36152     else
36153     {
36154         cs = df-rt;
36155         sgn2 = -1;
36156     }
36157     acs = ae_fabs(cs, _state);
36158     if( ae_fp_greater(acs,ab) )
36159     {
36160         ct = -tb/cs;
36161         *sn1 = 1/ae_sqrt(1+ct*ct, _state);
36162         *cs1 = ct*(*sn1);
36163     }
36164     else
36165     {
36166         if( ae_fp_eq(ab,(double)(0)) )
36167         {
36168             *cs1 = (double)(1);
36169             *sn1 = (double)(0);
36170         }
36171         else
36172         {
36173             tn = -cs/tb;
36174             *cs1 = 1/ae_sqrt(1+tn*tn, _state);
36175             *sn1 = tn*(*cs1);
36176         }
36177     }
36178     if( sgn1==sgn2 )
36179     {
36180         tn = *cs1;
36181         *cs1 = -*sn1;
36182         *sn1 = tn;
36183     }
36184 }
36185 
36186 
36187 /*************************************************************************
36188 Internal routine
36189 *************************************************************************/
evd_tdevdpythag(double a,double b,ae_state * _state)36190 static double evd_tdevdpythag(double a, double b, ae_state *_state)
36191 {
36192     double result;
36193 
36194 
36195     if( ae_fp_less(ae_fabs(a, _state),ae_fabs(b, _state)) )
36196     {
36197         result = ae_fabs(b, _state)*ae_sqrt(1+ae_sqr(a/b, _state), _state);
36198     }
36199     else
36200     {
36201         result = ae_fabs(a, _state)*ae_sqrt(1+ae_sqr(b/a, _state), _state);
36202     }
36203     return result;
36204 }
36205 
36206 
36207 /*************************************************************************
36208 Internal routine
36209 *************************************************************************/
evd_tdevdextsign(double a,double b,ae_state * _state)36210 static double evd_tdevdextsign(double a, double b, ae_state *_state)
36211 {
36212     double result;
36213 
36214 
36215     if( ae_fp_greater_eq(b,(double)(0)) )
36216     {
36217         result = ae_fabs(a, _state);
36218     }
36219     else
36220     {
36221         result = -ae_fabs(a, _state);
36222     }
36223     return result;
36224 }
36225 
36226 
evd_internalbisectioneigenvalues(ae_vector * d,ae_vector * e,ae_int_t n,ae_int_t irange,ae_int_t iorder,double vl,double vu,ae_int_t il,ae_int_t iu,double abstol,ae_vector * w,ae_int_t * m,ae_int_t * nsplit,ae_vector * iblock,ae_vector * isplit,ae_int_t * errorcode,ae_state * _state)36227 static ae_bool evd_internalbisectioneigenvalues(/* Real    */ ae_vector* d,
36228      /* Real    */ ae_vector* e,
36229      ae_int_t n,
36230      ae_int_t irange,
36231      ae_int_t iorder,
36232      double vl,
36233      double vu,
36234      ae_int_t il,
36235      ae_int_t iu,
36236      double abstol,
36237      /* Real    */ ae_vector* w,
36238      ae_int_t* m,
36239      ae_int_t* nsplit,
36240      /* Integer */ ae_vector* iblock,
36241      /* Integer */ ae_vector* isplit,
36242      ae_int_t* errorcode,
36243      ae_state *_state)
36244 {
36245     ae_frame _frame_block;
36246     ae_vector _d;
36247     ae_vector _e;
36248     double fudge;
36249     double relfac;
36250     ae_bool ncnvrg;
36251     ae_bool toofew;
36252     ae_int_t ib;
36253     ae_int_t ibegin;
36254     ae_int_t idiscl;
36255     ae_int_t idiscu;
36256     ae_int_t ie;
36257     ae_int_t iend;
36258     ae_int_t iinfo;
36259     ae_int_t im;
36260     ae_int_t iin;
36261     ae_int_t ioff;
36262     ae_int_t iout;
36263     ae_int_t itmax;
36264     ae_int_t iw;
36265     ae_int_t iwoff;
36266     ae_int_t j;
36267     ae_int_t itmp1;
36268     ae_int_t jb;
36269     ae_int_t jdisc;
36270     ae_int_t je;
36271     ae_int_t nwl;
36272     ae_int_t nwu;
36273     double atoli;
36274     double bnorm;
36275     double gl;
36276     double gu;
36277     double pivmin;
36278     double rtoli;
36279     double safemn;
36280     double tmp1;
36281     double tmp2;
36282     double tnorm;
36283     double ulp;
36284     double wkill;
36285     double wl;
36286     double wlu;
36287     double wu;
36288     double wul;
36289     double scalefactor;
36290     double t;
36291     ae_vector idumma;
36292     ae_vector work;
36293     ae_vector iwork;
36294     ae_vector ia1s2;
36295     ae_vector ra1s2;
36296     ae_matrix ra1s2x2;
36297     ae_matrix ia1s2x2;
36298     ae_vector ra1siin;
36299     ae_vector ra2siin;
36300     ae_vector ra3siin;
36301     ae_vector ra4siin;
36302     ae_matrix ra1siinx2;
36303     ae_matrix ia1siinx2;
36304     ae_vector iworkspace;
36305     ae_vector rworkspace;
36306     ae_int_t tmpi;
36307     ae_bool result;
36308 
36309     ae_frame_make(_state, &_frame_block);
36310     memset(&_d, 0, sizeof(_d));
36311     memset(&_e, 0, sizeof(_e));
36312     memset(&idumma, 0, sizeof(idumma));
36313     memset(&work, 0, sizeof(work));
36314     memset(&iwork, 0, sizeof(iwork));
36315     memset(&ia1s2, 0, sizeof(ia1s2));
36316     memset(&ra1s2, 0, sizeof(ra1s2));
36317     memset(&ra1s2x2, 0, sizeof(ra1s2x2));
36318     memset(&ia1s2x2, 0, sizeof(ia1s2x2));
36319     memset(&ra1siin, 0, sizeof(ra1siin));
36320     memset(&ra2siin, 0, sizeof(ra2siin));
36321     memset(&ra3siin, 0, sizeof(ra3siin));
36322     memset(&ra4siin, 0, sizeof(ra4siin));
36323     memset(&ra1siinx2, 0, sizeof(ra1siinx2));
36324     memset(&ia1siinx2, 0, sizeof(ia1siinx2));
36325     memset(&iworkspace, 0, sizeof(iworkspace));
36326     memset(&rworkspace, 0, sizeof(rworkspace));
36327     ae_vector_init_copy(&_d, d, _state, ae_true);
36328     d = &_d;
36329     ae_vector_init_copy(&_e, e, _state, ae_true);
36330     e = &_e;
36331     ae_vector_clear(w);
36332     *m = 0;
36333     *nsplit = 0;
36334     ae_vector_clear(iblock);
36335     ae_vector_clear(isplit);
36336     *errorcode = 0;
36337     ae_vector_init(&idumma, 0, DT_INT, _state, ae_true);
36338     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
36339     ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
36340     ae_vector_init(&ia1s2, 0, DT_INT, _state, ae_true);
36341     ae_vector_init(&ra1s2, 0, DT_REAL, _state, ae_true);
36342     ae_matrix_init(&ra1s2x2, 0, 0, DT_REAL, _state, ae_true);
36343     ae_matrix_init(&ia1s2x2, 0, 0, DT_INT, _state, ae_true);
36344     ae_vector_init(&ra1siin, 0, DT_REAL, _state, ae_true);
36345     ae_vector_init(&ra2siin, 0, DT_REAL, _state, ae_true);
36346     ae_vector_init(&ra3siin, 0, DT_REAL, _state, ae_true);
36347     ae_vector_init(&ra4siin, 0, DT_REAL, _state, ae_true);
36348     ae_matrix_init(&ra1siinx2, 0, 0, DT_REAL, _state, ae_true);
36349     ae_matrix_init(&ia1siinx2, 0, 0, DT_INT, _state, ae_true);
36350     ae_vector_init(&iworkspace, 0, DT_INT, _state, ae_true);
36351     ae_vector_init(&rworkspace, 0, DT_REAL, _state, ae_true);
36352 
36353 
36354     /*
36355      * Quick return if possible
36356      */
36357     *m = 0;
36358     if( n==0 )
36359     {
36360         result = ae_true;
36361         ae_frame_leave(_state);
36362         return result;
36363     }
36364 
36365     /*
36366      * Get machine constants
36367      * NB is the minimum vector length for vector bisection, or 0
36368      * if only scalar is to be done.
36369      */
36370     fudge = (double)(2);
36371     relfac = (double)(2);
36372     safemn = ae_minrealnumber;
36373     ulp = 2*ae_machineepsilon;
36374     rtoli = ulp*relfac;
36375     ae_vector_set_length(&idumma, 1+1, _state);
36376     ae_vector_set_length(&work, 4*n+1, _state);
36377     ae_vector_set_length(&iwork, 3*n+1, _state);
36378     ae_vector_set_length(w, n+1, _state);
36379     ae_vector_set_length(iblock, n+1, _state);
36380     ae_vector_set_length(isplit, n+1, _state);
36381     ae_vector_set_length(&ia1s2, 2+1, _state);
36382     ae_vector_set_length(&ra1s2, 2+1, _state);
36383     ae_matrix_set_length(&ra1s2x2, 2+1, 2+1, _state);
36384     ae_matrix_set_length(&ia1s2x2, 2+1, 2+1, _state);
36385     ae_vector_set_length(&ra1siin, n+1, _state);
36386     ae_vector_set_length(&ra2siin, n+1, _state);
36387     ae_vector_set_length(&ra3siin, n+1, _state);
36388     ae_vector_set_length(&ra4siin, n+1, _state);
36389     ae_matrix_set_length(&ra1siinx2, n+1, 2+1, _state);
36390     ae_matrix_set_length(&ia1siinx2, n+1, 2+1, _state);
36391     ae_vector_set_length(&iworkspace, n+1, _state);
36392     ae_vector_set_length(&rworkspace, n+1, _state);
36393 
36394     /*
36395      * these initializers are not really necessary,
36396      * but without them compiler complains about uninitialized locals
36397      */
36398     wlu = (double)(0);
36399     wul = (double)(0);
36400 
36401     /*
36402      * Check for Errors
36403      */
36404     result = ae_false;
36405     *errorcode = 0;
36406     if( irange<=0||irange>=4 )
36407     {
36408         *errorcode = -4;
36409     }
36410     if( iorder<=0||iorder>=3 )
36411     {
36412         *errorcode = -5;
36413     }
36414     if( n<0 )
36415     {
36416         *errorcode = -3;
36417     }
36418     if( irange==2&&ae_fp_greater_eq(vl,vu) )
36419     {
36420         *errorcode = -6;
36421     }
36422     if( irange==3&&(il<1||il>ae_maxint(1, n, _state)) )
36423     {
36424         *errorcode = -8;
36425     }
36426     if( irange==3&&(iu<ae_minint(n, il, _state)||iu>n) )
36427     {
36428         *errorcode = -9;
36429     }
36430     if( *errorcode!=0 )
36431     {
36432         ae_frame_leave(_state);
36433         return result;
36434     }
36435 
36436     /*
36437      * Initialize error flags
36438      */
36439     ncnvrg = ae_false;
36440     toofew = ae_false;
36441 
36442     /*
36443      * Simplifications:
36444      */
36445     if( (irange==3&&il==1)&&iu==n )
36446     {
36447         irange = 1;
36448     }
36449 
36450     /*
36451      * Special Case when N=1
36452      */
36453     if( n==1 )
36454     {
36455         *nsplit = 1;
36456         isplit->ptr.p_int[1] = 1;
36457         if( irange==2&&(ae_fp_greater_eq(vl,d->ptr.p_double[1])||ae_fp_less(vu,d->ptr.p_double[1])) )
36458         {
36459             *m = 0;
36460         }
36461         else
36462         {
36463             w->ptr.p_double[1] = d->ptr.p_double[1];
36464             iblock->ptr.p_int[1] = 1;
36465             *m = 1;
36466         }
36467         result = ae_true;
36468         ae_frame_leave(_state);
36469         return result;
36470     }
36471 
36472     /*
36473      * Scaling
36474      */
36475     t = ae_fabs(d->ptr.p_double[n], _state);
36476     for(j=1; j<=n-1; j++)
36477     {
36478         t = ae_maxreal(t, ae_fabs(d->ptr.p_double[j], _state), _state);
36479         t = ae_maxreal(t, ae_fabs(e->ptr.p_double[j], _state), _state);
36480     }
36481     scalefactor = (double)(1);
36482     if( ae_fp_neq(t,(double)(0)) )
36483     {
36484         if( ae_fp_greater(t,ae_sqrt(ae_sqrt(ae_minrealnumber, _state), _state)*ae_sqrt(ae_maxrealnumber, _state)) )
36485         {
36486             scalefactor = t;
36487         }
36488         if( ae_fp_less(t,ae_sqrt(ae_sqrt(ae_maxrealnumber, _state), _state)*ae_sqrt(ae_minrealnumber, _state)) )
36489         {
36490             scalefactor = t;
36491         }
36492         for(j=1; j<=n-1; j++)
36493         {
36494             d->ptr.p_double[j] = d->ptr.p_double[j]/scalefactor;
36495             e->ptr.p_double[j] = e->ptr.p_double[j]/scalefactor;
36496         }
36497         d->ptr.p_double[n] = d->ptr.p_double[n]/scalefactor;
36498     }
36499 
36500     /*
36501      * Compute Splitting Points
36502      */
36503     *nsplit = 1;
36504     work.ptr.p_double[n] = (double)(0);
36505     pivmin = (double)(1);
36506     for(j=2; j<=n; j++)
36507     {
36508         tmp1 = ae_sqr(e->ptr.p_double[j-1], _state);
36509         if( ae_fp_greater(ae_fabs(d->ptr.p_double[j]*d->ptr.p_double[j-1], _state)*ae_sqr(ulp, _state)+safemn,tmp1) )
36510         {
36511             isplit->ptr.p_int[*nsplit] = j-1;
36512             *nsplit = *nsplit+1;
36513             work.ptr.p_double[j-1] = (double)(0);
36514         }
36515         else
36516         {
36517             work.ptr.p_double[j-1] = tmp1;
36518             pivmin = ae_maxreal(pivmin, tmp1, _state);
36519         }
36520     }
36521     isplit->ptr.p_int[*nsplit] = n;
36522     pivmin = pivmin*safemn;
36523 
36524     /*
36525      * Compute Interval and ATOLI
36526      */
36527     if( irange==3 )
36528     {
36529 
36530         /*
36531          * RANGE='I': Compute the interval containing eigenvalues
36532          *     IL through IU.
36533          *
36534          * Compute Gershgorin interval for entire (split) matrix
36535          * and use it as the initial interval
36536          */
36537         gu = d->ptr.p_double[1];
36538         gl = d->ptr.p_double[1];
36539         tmp1 = (double)(0);
36540         for(j=1; j<=n-1; j++)
36541         {
36542             tmp2 = ae_sqrt(work.ptr.p_double[j], _state);
36543             gu = ae_maxreal(gu, d->ptr.p_double[j]+tmp1+tmp2, _state);
36544             gl = ae_minreal(gl, d->ptr.p_double[j]-tmp1-tmp2, _state);
36545             tmp1 = tmp2;
36546         }
36547         gu = ae_maxreal(gu, d->ptr.p_double[n]+tmp1, _state);
36548         gl = ae_minreal(gl, d->ptr.p_double[n]-tmp1, _state);
36549         tnorm = ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state);
36550         gl = gl-fudge*tnorm*ulp*n-fudge*2*pivmin;
36551         gu = gu+fudge*tnorm*ulp*n+fudge*pivmin;
36552 
36553         /*
36554          * Compute Iteration parameters
36555          */
36556         itmax = ae_iceil((ae_log(tnorm+pivmin, _state)-ae_log(pivmin, _state))/ae_log((double)(2), _state), _state)+2;
36557         if( ae_fp_less_eq(abstol,(double)(0)) )
36558         {
36559             atoli = ulp*tnorm;
36560         }
36561         else
36562         {
36563             atoli = abstol;
36564         }
36565         work.ptr.p_double[n+1] = gl;
36566         work.ptr.p_double[n+2] = gl;
36567         work.ptr.p_double[n+3] = gu;
36568         work.ptr.p_double[n+4] = gu;
36569         work.ptr.p_double[n+5] = gl;
36570         work.ptr.p_double[n+6] = gu;
36571         iwork.ptr.p_int[1] = -1;
36572         iwork.ptr.p_int[2] = -1;
36573         iwork.ptr.p_int[3] = n+1;
36574         iwork.ptr.p_int[4] = n+1;
36575         iwork.ptr.p_int[5] = il-1;
36576         iwork.ptr.p_int[6] = iu;
36577 
36578         /*
36579          * Calling DLAEBZ
36580          *
36581          * DLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E,
36582          *    WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT,
36583          *    IWORK, W, IBLOCK, IINFO )
36584          */
36585         ia1s2.ptr.p_int[1] = iwork.ptr.p_int[5];
36586         ia1s2.ptr.p_int[2] = iwork.ptr.p_int[6];
36587         ra1s2.ptr.p_double[1] = work.ptr.p_double[n+5];
36588         ra1s2.ptr.p_double[2] = work.ptr.p_double[n+6];
36589         ra1s2x2.ptr.pp_double[1][1] = work.ptr.p_double[n+1];
36590         ra1s2x2.ptr.pp_double[2][1] = work.ptr.p_double[n+2];
36591         ra1s2x2.ptr.pp_double[1][2] = work.ptr.p_double[n+3];
36592         ra1s2x2.ptr.pp_double[2][2] = work.ptr.p_double[n+4];
36593         ia1s2x2.ptr.pp_int[1][1] = iwork.ptr.p_int[1];
36594         ia1s2x2.ptr.pp_int[2][1] = iwork.ptr.p_int[2];
36595         ia1s2x2.ptr.pp_int[1][2] = iwork.ptr.p_int[3];
36596         ia1s2x2.ptr.pp_int[2][2] = iwork.ptr.p_int[4];
36597         evd_internaldlaebz(3, itmax, n, 2, 2, atoli, rtoli, pivmin, d, e, &work, &ia1s2, &ra1s2x2, &ra1s2, &iout, &ia1s2x2, w, iblock, &iinfo, _state);
36598         iwork.ptr.p_int[5] = ia1s2.ptr.p_int[1];
36599         iwork.ptr.p_int[6] = ia1s2.ptr.p_int[2];
36600         work.ptr.p_double[n+5] = ra1s2.ptr.p_double[1];
36601         work.ptr.p_double[n+6] = ra1s2.ptr.p_double[2];
36602         work.ptr.p_double[n+1] = ra1s2x2.ptr.pp_double[1][1];
36603         work.ptr.p_double[n+2] = ra1s2x2.ptr.pp_double[2][1];
36604         work.ptr.p_double[n+3] = ra1s2x2.ptr.pp_double[1][2];
36605         work.ptr.p_double[n+4] = ra1s2x2.ptr.pp_double[2][2];
36606         iwork.ptr.p_int[1] = ia1s2x2.ptr.pp_int[1][1];
36607         iwork.ptr.p_int[2] = ia1s2x2.ptr.pp_int[2][1];
36608         iwork.ptr.p_int[3] = ia1s2x2.ptr.pp_int[1][2];
36609         iwork.ptr.p_int[4] = ia1s2x2.ptr.pp_int[2][2];
36610         if( iwork.ptr.p_int[6]==iu )
36611         {
36612             wl = work.ptr.p_double[n+1];
36613             wlu = work.ptr.p_double[n+3];
36614             nwl = iwork.ptr.p_int[1];
36615             wu = work.ptr.p_double[n+4];
36616             wul = work.ptr.p_double[n+2];
36617             nwu = iwork.ptr.p_int[4];
36618         }
36619         else
36620         {
36621             wl = work.ptr.p_double[n+2];
36622             wlu = work.ptr.p_double[n+4];
36623             nwl = iwork.ptr.p_int[2];
36624             wu = work.ptr.p_double[n+3];
36625             wul = work.ptr.p_double[n+1];
36626             nwu = iwork.ptr.p_int[3];
36627         }
36628         if( ((nwl<0||nwl>=n)||nwu<1)||nwu>n )
36629         {
36630             *errorcode = 4;
36631             result = ae_false;
36632             ae_frame_leave(_state);
36633             return result;
36634         }
36635     }
36636     else
36637     {
36638 
36639         /*
36640          * RANGE='A' or 'V' -- Set ATOLI
36641          */
36642         tnorm = ae_maxreal(ae_fabs(d->ptr.p_double[1], _state)+ae_fabs(e->ptr.p_double[1], _state), ae_fabs(d->ptr.p_double[n], _state)+ae_fabs(e->ptr.p_double[n-1], _state), _state);
36643         for(j=2; j<=n-1; j++)
36644         {
36645             tnorm = ae_maxreal(tnorm, ae_fabs(d->ptr.p_double[j], _state)+ae_fabs(e->ptr.p_double[j-1], _state)+ae_fabs(e->ptr.p_double[j], _state), _state);
36646         }
36647         if( ae_fp_less_eq(abstol,(double)(0)) )
36648         {
36649             atoli = ulp*tnorm;
36650         }
36651         else
36652         {
36653             atoli = abstol;
36654         }
36655         if( irange==2 )
36656         {
36657             wl = vl;
36658             wu = vu;
36659         }
36660         else
36661         {
36662             wl = (double)(0);
36663             wu = (double)(0);
36664         }
36665     }
36666 
36667     /*
36668      * Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU.
36669      * NWL accumulates the number of eigenvalues .le. WL,
36670      * NWU accumulates the number of eigenvalues .le. WU
36671      */
36672     *m = 0;
36673     iend = 0;
36674     *errorcode = 0;
36675     nwl = 0;
36676     nwu = 0;
36677     for(jb=1; jb<=*nsplit; jb++)
36678     {
36679         ioff = iend;
36680         ibegin = ioff+1;
36681         iend = isplit->ptr.p_int[jb];
36682         iin = iend-ioff;
36683         if( iin==1 )
36684         {
36685 
36686             /*
36687              * Special Case -- IIN=1
36688              */
36689             if( irange==1||ae_fp_greater_eq(wl,d->ptr.p_double[ibegin]-pivmin) )
36690             {
36691                 nwl = nwl+1;
36692             }
36693             if( irange==1||ae_fp_greater_eq(wu,d->ptr.p_double[ibegin]-pivmin) )
36694             {
36695                 nwu = nwu+1;
36696             }
36697             if( irange==1||(ae_fp_less(wl,d->ptr.p_double[ibegin]-pivmin)&&ae_fp_greater_eq(wu,d->ptr.p_double[ibegin]-pivmin)) )
36698             {
36699                 *m = *m+1;
36700                 w->ptr.p_double[*m] = d->ptr.p_double[ibegin];
36701                 iblock->ptr.p_int[*m] = jb;
36702             }
36703         }
36704         else
36705         {
36706 
36707             /*
36708              * General Case -- IIN > 1
36709              *
36710              * Compute Gershgorin Interval
36711              * and use it as the initial interval
36712              */
36713             gu = d->ptr.p_double[ibegin];
36714             gl = d->ptr.p_double[ibegin];
36715             tmp1 = (double)(0);
36716             for(j=ibegin; j<=iend-1; j++)
36717             {
36718                 tmp2 = ae_fabs(e->ptr.p_double[j], _state);
36719                 gu = ae_maxreal(gu, d->ptr.p_double[j]+tmp1+tmp2, _state);
36720                 gl = ae_minreal(gl, d->ptr.p_double[j]-tmp1-tmp2, _state);
36721                 tmp1 = tmp2;
36722             }
36723             gu = ae_maxreal(gu, d->ptr.p_double[iend]+tmp1, _state);
36724             gl = ae_minreal(gl, d->ptr.p_double[iend]-tmp1, _state);
36725             bnorm = ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state);
36726             gl = gl-fudge*bnorm*ulp*iin-fudge*pivmin;
36727             gu = gu+fudge*bnorm*ulp*iin+fudge*pivmin;
36728 
36729             /*
36730              * Compute ATOLI for the current submatrix
36731              */
36732             if( ae_fp_less_eq(abstol,(double)(0)) )
36733             {
36734                 atoli = ulp*ae_maxreal(ae_fabs(gl, _state), ae_fabs(gu, _state), _state);
36735             }
36736             else
36737             {
36738                 atoli = abstol;
36739             }
36740             if( irange>1 )
36741             {
36742                 if( ae_fp_less(gu,wl) )
36743                 {
36744                     nwl = nwl+iin;
36745                     nwu = nwu+iin;
36746                     continue;
36747                 }
36748                 gl = ae_maxreal(gl, wl, _state);
36749                 gu = ae_minreal(gu, wu, _state);
36750                 if( ae_fp_greater_eq(gl,gu) )
36751                 {
36752                     continue;
36753                 }
36754             }
36755 
36756             /*
36757              * Set Up Initial Interval
36758              */
36759             work.ptr.p_double[n+1] = gl;
36760             work.ptr.p_double[n+iin+1] = gu;
36761 
36762             /*
36763              * Calling DLAEBZ
36764              *
36765              * CALL DLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
36766              *    D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
36767              *    IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM,
36768              *    IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
36769              */
36770             for(tmpi=1; tmpi<=iin; tmpi++)
36771             {
36772                 ra1siin.ptr.p_double[tmpi] = d->ptr.p_double[ibegin-1+tmpi];
36773                 if( ibegin-1+tmpi<n )
36774                 {
36775                     ra2siin.ptr.p_double[tmpi] = e->ptr.p_double[ibegin-1+tmpi];
36776                 }
36777                 ra3siin.ptr.p_double[tmpi] = work.ptr.p_double[ibegin-1+tmpi];
36778                 ra1siinx2.ptr.pp_double[tmpi][1] = work.ptr.p_double[n+tmpi];
36779                 ra1siinx2.ptr.pp_double[tmpi][2] = work.ptr.p_double[n+tmpi+iin];
36780                 ra4siin.ptr.p_double[tmpi] = work.ptr.p_double[n+2*iin+tmpi];
36781                 rworkspace.ptr.p_double[tmpi] = w->ptr.p_double[*m+tmpi];
36782                 iworkspace.ptr.p_int[tmpi] = iblock->ptr.p_int[*m+tmpi];
36783                 ia1siinx2.ptr.pp_int[tmpi][1] = iwork.ptr.p_int[tmpi];
36784                 ia1siinx2.ptr.pp_int[tmpi][2] = iwork.ptr.p_int[tmpi+iin];
36785             }
36786             evd_internaldlaebz(1, 0, iin, iin, 1, atoli, rtoli, pivmin, &ra1siin, &ra2siin, &ra3siin, &idumma, &ra1siinx2, &ra4siin, &im, &ia1siinx2, &rworkspace, &iworkspace, &iinfo, _state);
36787             for(tmpi=1; tmpi<=iin; tmpi++)
36788             {
36789                 work.ptr.p_double[n+tmpi] = ra1siinx2.ptr.pp_double[tmpi][1];
36790                 work.ptr.p_double[n+tmpi+iin] = ra1siinx2.ptr.pp_double[tmpi][2];
36791                 work.ptr.p_double[n+2*iin+tmpi] = ra4siin.ptr.p_double[tmpi];
36792                 w->ptr.p_double[*m+tmpi] = rworkspace.ptr.p_double[tmpi];
36793                 iblock->ptr.p_int[*m+tmpi] = iworkspace.ptr.p_int[tmpi];
36794                 iwork.ptr.p_int[tmpi] = ia1siinx2.ptr.pp_int[tmpi][1];
36795                 iwork.ptr.p_int[tmpi+iin] = ia1siinx2.ptr.pp_int[tmpi][2];
36796             }
36797             nwl = nwl+iwork.ptr.p_int[1];
36798             nwu = nwu+iwork.ptr.p_int[iin+1];
36799             iwoff = *m-iwork.ptr.p_int[1];
36800 
36801             /*
36802              * Compute Eigenvalues
36803              */
36804             itmax = ae_iceil((ae_log(gu-gl+pivmin, _state)-ae_log(pivmin, _state))/ae_log((double)(2), _state), _state)+2;
36805 
36806             /*
36807              * Calling DLAEBZ
36808              *
36809              *CALL DLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
36810              *    D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
36811              *    IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT,
36812              *    IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
36813              */
36814             for(tmpi=1; tmpi<=iin; tmpi++)
36815             {
36816                 ra1siin.ptr.p_double[tmpi] = d->ptr.p_double[ibegin-1+tmpi];
36817                 if( ibegin-1+tmpi<n )
36818                 {
36819                     ra2siin.ptr.p_double[tmpi] = e->ptr.p_double[ibegin-1+tmpi];
36820                 }
36821                 ra3siin.ptr.p_double[tmpi] = work.ptr.p_double[ibegin-1+tmpi];
36822                 ra1siinx2.ptr.pp_double[tmpi][1] = work.ptr.p_double[n+tmpi];
36823                 ra1siinx2.ptr.pp_double[tmpi][2] = work.ptr.p_double[n+tmpi+iin];
36824                 ra4siin.ptr.p_double[tmpi] = work.ptr.p_double[n+2*iin+tmpi];
36825                 rworkspace.ptr.p_double[tmpi] = w->ptr.p_double[*m+tmpi];
36826                 iworkspace.ptr.p_int[tmpi] = iblock->ptr.p_int[*m+tmpi];
36827                 ia1siinx2.ptr.pp_int[tmpi][1] = iwork.ptr.p_int[tmpi];
36828                 ia1siinx2.ptr.pp_int[tmpi][2] = iwork.ptr.p_int[tmpi+iin];
36829             }
36830             evd_internaldlaebz(2, itmax, iin, iin, 1, atoli, rtoli, pivmin, &ra1siin, &ra2siin, &ra3siin, &idumma, &ra1siinx2, &ra4siin, &iout, &ia1siinx2, &rworkspace, &iworkspace, &iinfo, _state);
36831             for(tmpi=1; tmpi<=iin; tmpi++)
36832             {
36833                 work.ptr.p_double[n+tmpi] = ra1siinx2.ptr.pp_double[tmpi][1];
36834                 work.ptr.p_double[n+tmpi+iin] = ra1siinx2.ptr.pp_double[tmpi][2];
36835                 work.ptr.p_double[n+2*iin+tmpi] = ra4siin.ptr.p_double[tmpi];
36836                 w->ptr.p_double[*m+tmpi] = rworkspace.ptr.p_double[tmpi];
36837                 iblock->ptr.p_int[*m+tmpi] = iworkspace.ptr.p_int[tmpi];
36838                 iwork.ptr.p_int[tmpi] = ia1siinx2.ptr.pp_int[tmpi][1];
36839                 iwork.ptr.p_int[tmpi+iin] = ia1siinx2.ptr.pp_int[tmpi][2];
36840             }
36841 
36842             /*
36843              * Copy Eigenvalues Into W and IBLOCK
36844              * Use -JB for block number for unconverged eigenvalues.
36845              */
36846             for(j=1; j<=iout; j++)
36847             {
36848                 tmp1 = 0.5*(work.ptr.p_double[j+n]+work.ptr.p_double[j+iin+n]);
36849 
36850                 /*
36851                  * Flag non-convergence.
36852                  */
36853                 if( j>iout-iinfo )
36854                 {
36855                     ncnvrg = ae_true;
36856                     ib = -jb;
36857                 }
36858                 else
36859                 {
36860                     ib = jb;
36861                 }
36862                 for(je=iwork.ptr.p_int[j]+1+iwoff; je<=iwork.ptr.p_int[j+iin]+iwoff; je++)
36863                 {
36864                     w->ptr.p_double[je] = tmp1;
36865                     iblock->ptr.p_int[je] = ib;
36866                 }
36867             }
36868             *m = *m+im;
36869         }
36870     }
36871 
36872     /*
36873      * If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU
36874      * If NWL+1 < IL or NWU > IU, discard extra eigenvalues.
36875      */
36876     if( irange==3 )
36877     {
36878         im = 0;
36879         idiscl = il-1-nwl;
36880         idiscu = nwu-iu;
36881         if( idiscl>0||idiscu>0 )
36882         {
36883             for(je=1; je<=*m; je++)
36884             {
36885                 if( ae_fp_less_eq(w->ptr.p_double[je],wlu)&&idiscl>0 )
36886                 {
36887                     idiscl = idiscl-1;
36888                 }
36889                 else
36890                 {
36891                     if( ae_fp_greater_eq(w->ptr.p_double[je],wul)&&idiscu>0 )
36892                     {
36893                         idiscu = idiscu-1;
36894                     }
36895                     else
36896                     {
36897                         im = im+1;
36898                         w->ptr.p_double[im] = w->ptr.p_double[je];
36899                         iblock->ptr.p_int[im] = iblock->ptr.p_int[je];
36900                     }
36901                 }
36902             }
36903             *m = im;
36904         }
36905         if( idiscl>0||idiscu>0 )
36906         {
36907 
36908             /*
36909              * Code to deal with effects of bad arithmetic:
36910              * Some low eigenvalues to be discarded are not in (WL,WLU],
36911              * or high eigenvalues to be discarded are not in (WUL,WU]
36912              * so just kill off the smallest IDISCL/largest IDISCU
36913              * eigenvalues, by simply finding the smallest/largest
36914              * eigenvalue(s).
36915              *
36916              * (If N(w) is monotone non-decreasing, this should never
36917              *  happen.)
36918              */
36919             if( idiscl>0 )
36920             {
36921                 wkill = wu;
36922                 for(jdisc=1; jdisc<=idiscl; jdisc++)
36923                 {
36924                     iw = 0;
36925                     for(je=1; je<=*m; je++)
36926                     {
36927                         if( iblock->ptr.p_int[je]!=0&&(ae_fp_less(w->ptr.p_double[je],wkill)||iw==0) )
36928                         {
36929                             iw = je;
36930                             wkill = w->ptr.p_double[je];
36931                         }
36932                     }
36933                     iblock->ptr.p_int[iw] = 0;
36934                 }
36935             }
36936             if( idiscu>0 )
36937             {
36938                 wkill = wl;
36939                 for(jdisc=1; jdisc<=idiscu; jdisc++)
36940                 {
36941                     iw = 0;
36942                     for(je=1; je<=*m; je++)
36943                     {
36944                         if( iblock->ptr.p_int[je]!=0&&(ae_fp_greater(w->ptr.p_double[je],wkill)||iw==0) )
36945                         {
36946                             iw = je;
36947                             wkill = w->ptr.p_double[je];
36948                         }
36949                     }
36950                     iblock->ptr.p_int[iw] = 0;
36951                 }
36952             }
36953             im = 0;
36954             for(je=1; je<=*m; je++)
36955             {
36956                 if( iblock->ptr.p_int[je]!=0 )
36957                 {
36958                     im = im+1;
36959                     w->ptr.p_double[im] = w->ptr.p_double[je];
36960                     iblock->ptr.p_int[im] = iblock->ptr.p_int[je];
36961                 }
36962             }
36963             *m = im;
36964         }
36965         if( idiscl<0||idiscu<0 )
36966         {
36967             toofew = ae_true;
36968         }
36969     }
36970 
36971     /*
36972      * If ORDER='B', do nothing -- the eigenvalues are already sorted
36973      *    by block.
36974      * If ORDER='E', sort the eigenvalues from smallest to largest
36975      */
36976     if( iorder==1&&*nsplit>1 )
36977     {
36978         for(je=1; je<=*m-1; je++)
36979         {
36980             ie = 0;
36981             tmp1 = w->ptr.p_double[je];
36982             for(j=je+1; j<=*m; j++)
36983             {
36984                 if( ae_fp_less(w->ptr.p_double[j],tmp1) )
36985                 {
36986                     ie = j;
36987                     tmp1 = w->ptr.p_double[j];
36988                 }
36989             }
36990             if( ie!=0 )
36991             {
36992                 itmp1 = iblock->ptr.p_int[ie];
36993                 w->ptr.p_double[ie] = w->ptr.p_double[je];
36994                 iblock->ptr.p_int[ie] = iblock->ptr.p_int[je];
36995                 w->ptr.p_double[je] = tmp1;
36996                 iblock->ptr.p_int[je] = itmp1;
36997             }
36998         }
36999     }
37000     for(j=1; j<=*m; j++)
37001     {
37002         w->ptr.p_double[j] = w->ptr.p_double[j]*scalefactor;
37003     }
37004     *errorcode = 0;
37005     if( ncnvrg )
37006     {
37007         *errorcode = *errorcode+1;
37008     }
37009     if( toofew )
37010     {
37011         *errorcode = *errorcode+2;
37012     }
37013     result = *errorcode==0;
37014     ae_frame_leave(_state);
37015     return result;
37016 }
37017 
37018 
evd_internaldstein(ae_int_t n,ae_vector * d,ae_vector * e,ae_int_t m,ae_vector * w,ae_vector * iblock,ae_vector * isplit,ae_matrix * z,ae_vector * ifail,ae_int_t * info,ae_state * _state)37019 static void evd_internaldstein(ae_int_t n,
37020      /* Real    */ ae_vector* d,
37021      /* Real    */ ae_vector* e,
37022      ae_int_t m,
37023      /* Real    */ ae_vector* w,
37024      /* Integer */ ae_vector* iblock,
37025      /* Integer */ ae_vector* isplit,
37026      /* Real    */ ae_matrix* z,
37027      /* Integer */ ae_vector* ifail,
37028      ae_int_t* info,
37029      ae_state *_state)
37030 {
37031     ae_frame _frame_block;
37032     ae_vector _e;
37033     ae_vector _w;
37034     ae_int_t maxits;
37035     ae_int_t extra;
37036     ae_int_t b1;
37037     ae_int_t blksiz;
37038     ae_int_t bn;
37039     ae_int_t gpind;
37040     ae_int_t i;
37041     ae_int_t iinfo;
37042     ae_int_t its;
37043     ae_int_t j;
37044     ae_int_t j1;
37045     ae_int_t jblk;
37046     ae_int_t jmax;
37047     ae_int_t nblk;
37048     ae_int_t nrmchk;
37049     double dtpcrt;
37050     double eps;
37051     double eps1;
37052     double nrm;
37053     double onenrm;
37054     double ortol;
37055     double pertol;
37056     double scl;
37057     double sep;
37058     double tol;
37059     double xj;
37060     double xjm;
37061     double ztr;
37062     ae_vector work1;
37063     ae_vector work2;
37064     ae_vector work3;
37065     ae_vector work4;
37066     ae_vector work5;
37067     ae_vector iwork;
37068     ae_bool tmpcriterion;
37069     ae_int_t ti;
37070     ae_int_t i1;
37071     ae_int_t i2;
37072     double v;
37073     hqrndstate rs;
37074 
37075     ae_frame_make(_state, &_frame_block);
37076     memset(&_e, 0, sizeof(_e));
37077     memset(&_w, 0, sizeof(_w));
37078     memset(&work1, 0, sizeof(work1));
37079     memset(&work2, 0, sizeof(work2));
37080     memset(&work3, 0, sizeof(work3));
37081     memset(&work4, 0, sizeof(work4));
37082     memset(&work5, 0, sizeof(work5));
37083     memset(&iwork, 0, sizeof(iwork));
37084     memset(&rs, 0, sizeof(rs));
37085     ae_vector_init_copy(&_e, e, _state, ae_true);
37086     e = &_e;
37087     ae_vector_init_copy(&_w, w, _state, ae_true);
37088     w = &_w;
37089     ae_matrix_clear(z);
37090     ae_vector_clear(ifail);
37091     *info = 0;
37092     ae_vector_init(&work1, 0, DT_REAL, _state, ae_true);
37093     ae_vector_init(&work2, 0, DT_REAL, _state, ae_true);
37094     ae_vector_init(&work3, 0, DT_REAL, _state, ae_true);
37095     ae_vector_init(&work4, 0, DT_REAL, _state, ae_true);
37096     ae_vector_init(&work5, 0, DT_REAL, _state, ae_true);
37097     ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
37098     _hqrndstate_init(&rs, _state, ae_true);
37099 
37100     hqrndseed(346436, 2434, &rs, _state);
37101     maxits = 5;
37102     extra = 2;
37103     ae_vector_set_length(&work1, ae_maxint(n, 1, _state)+1, _state);
37104     ae_vector_set_length(&work2, ae_maxint(n-1, 1, _state)+1, _state);
37105     ae_vector_set_length(&work3, ae_maxint(n, 1, _state)+1, _state);
37106     ae_vector_set_length(&work4, ae_maxint(n, 1, _state)+1, _state);
37107     ae_vector_set_length(&work5, ae_maxint(n, 1, _state)+1, _state);
37108     ae_vector_set_length(&iwork, ae_maxint(n, 1, _state)+1, _state);
37109     ae_vector_set_length(ifail, ae_maxint(m, 1, _state)+1, _state);
37110     ae_matrix_set_length(z, ae_maxint(n, 1, _state)+1, ae_maxint(m, 1, _state)+1, _state);
37111 
37112     /*
37113      * these initializers are not really necessary,
37114      * but without them compiler complains about uninitialized locals
37115      */
37116     gpind = 0;
37117     onenrm = (double)(0);
37118     ortol = (double)(0);
37119     dtpcrt = (double)(0);
37120     xjm = (double)(0);
37121 
37122     /*
37123      * Test the input parameters.
37124      */
37125     *info = 0;
37126     for(i=1; i<=m; i++)
37127     {
37128         ifail->ptr.p_int[i] = 0;
37129     }
37130     if( n<0 )
37131     {
37132         *info = -1;
37133         ae_frame_leave(_state);
37134         return;
37135     }
37136     if( m<0||m>n )
37137     {
37138         *info = -4;
37139         ae_frame_leave(_state);
37140         return;
37141     }
37142     for(j=2; j<=m; j++)
37143     {
37144         if( iblock->ptr.p_int[j]<iblock->ptr.p_int[j-1] )
37145         {
37146             *info = -6;
37147             break;
37148         }
37149         if( iblock->ptr.p_int[j]==iblock->ptr.p_int[j-1]&&ae_fp_less(w->ptr.p_double[j],w->ptr.p_double[j-1]) )
37150         {
37151             *info = -5;
37152             break;
37153         }
37154     }
37155     if( *info!=0 )
37156     {
37157         ae_frame_leave(_state);
37158         return;
37159     }
37160 
37161     /*
37162      * Quick return if possible
37163      */
37164     if( n==0||m==0 )
37165     {
37166         ae_frame_leave(_state);
37167         return;
37168     }
37169     if( n==1 )
37170     {
37171         z->ptr.pp_double[1][1] = (double)(1);
37172         ae_frame_leave(_state);
37173         return;
37174     }
37175 
37176     /*
37177      * Some preparations
37178      */
37179     ti = n-1;
37180     ae_v_move(&work1.ptr.p_double[1], 1, &e->ptr.p_double[1], 1, ae_v_len(1,ti));
37181     ae_vector_set_length(e, n+1, _state);
37182     ae_v_move(&e->ptr.p_double[1], 1, &work1.ptr.p_double[1], 1, ae_v_len(1,ti));
37183     ae_v_move(&work1.ptr.p_double[1], 1, &w->ptr.p_double[1], 1, ae_v_len(1,m));
37184     ae_vector_set_length(w, n+1, _state);
37185     ae_v_move(&w->ptr.p_double[1], 1, &work1.ptr.p_double[1], 1, ae_v_len(1,m));
37186 
37187     /*
37188      * Get machine constants.
37189      */
37190     eps = ae_machineepsilon;
37191 
37192     /*
37193      * Compute eigenvectors of matrix blocks.
37194      */
37195     j1 = 1;
37196     for(nblk=1; nblk<=iblock->ptr.p_int[m]; nblk++)
37197     {
37198 
37199         /*
37200          * Find starting and ending indices of block nblk.
37201          */
37202         if( nblk==1 )
37203         {
37204             b1 = 1;
37205         }
37206         else
37207         {
37208             b1 = isplit->ptr.p_int[nblk-1]+1;
37209         }
37210         bn = isplit->ptr.p_int[nblk];
37211         blksiz = bn-b1+1;
37212         if( blksiz!=1 )
37213         {
37214 
37215             /*
37216              * Compute reorthogonalization criterion and stopping criterion.
37217              */
37218             gpind = b1;
37219             onenrm = ae_fabs(d->ptr.p_double[b1], _state)+ae_fabs(e->ptr.p_double[b1], _state);
37220             onenrm = ae_maxreal(onenrm, ae_fabs(d->ptr.p_double[bn], _state)+ae_fabs(e->ptr.p_double[bn-1], _state), _state);
37221             for(i=b1+1; i<=bn-1; i++)
37222             {
37223                 onenrm = ae_maxreal(onenrm, ae_fabs(d->ptr.p_double[i], _state)+ae_fabs(e->ptr.p_double[i-1], _state)+ae_fabs(e->ptr.p_double[i], _state), _state);
37224             }
37225             ortol = 0.001*onenrm;
37226             dtpcrt = ae_sqrt(0.1/blksiz, _state);
37227         }
37228 
37229         /*
37230          * Loop through eigenvalues of block nblk.
37231          */
37232         jblk = 0;
37233         for(j=j1; j<=m; j++)
37234         {
37235             if( iblock->ptr.p_int[j]!=nblk )
37236             {
37237                 j1 = j;
37238                 break;
37239             }
37240             jblk = jblk+1;
37241             xj = w->ptr.p_double[j];
37242             if( blksiz==1 )
37243             {
37244 
37245                 /*
37246                  * Skip all the work if the block size is one.
37247                  */
37248                 work1.ptr.p_double[1] = (double)(1);
37249             }
37250             else
37251             {
37252 
37253                 /*
37254                  * If eigenvalues j and j-1 are too close, add a relatively
37255                  * small perturbation.
37256                  */
37257                 if( jblk>1 )
37258                 {
37259                     eps1 = ae_fabs(eps*xj, _state);
37260                     pertol = 10*eps1;
37261                     sep = xj-xjm;
37262                     if( ae_fp_less(sep,pertol) )
37263                     {
37264                         xj = xjm+pertol;
37265                     }
37266                 }
37267                 its = 0;
37268                 nrmchk = 0;
37269 
37270                 /*
37271                  * Get random starting vector.
37272                  */
37273                 for(ti=1; ti<=blksiz; ti++)
37274                 {
37275                     work1.ptr.p_double[ti] = 2*hqrnduniformr(&rs, _state)-1;
37276                 }
37277 
37278                 /*
37279                  * Copy the matrix T so it won't be destroyed in factorization.
37280                  */
37281                 for(ti=1; ti<=blksiz-1; ti++)
37282                 {
37283                     work2.ptr.p_double[ti] = e->ptr.p_double[b1+ti-1];
37284                     work3.ptr.p_double[ti] = e->ptr.p_double[b1+ti-1];
37285                     work4.ptr.p_double[ti] = d->ptr.p_double[b1+ti-1];
37286                 }
37287                 work4.ptr.p_double[blksiz] = d->ptr.p_double[b1+blksiz-1];
37288 
37289                 /*
37290                  * Compute LU factors with partial pivoting  ( PT = LU )
37291                  */
37292                 tol = (double)(0);
37293                 evd_tdininternaldlagtf(blksiz, &work4, xj, &work2, &work3, tol, &work5, &iwork, &iinfo, _state);
37294 
37295                 /*
37296                  * Update iteration count.
37297                  */
37298                 do
37299                 {
37300                     its = its+1;
37301                     if( its>maxits )
37302                     {
37303 
37304                         /*
37305                          * If stopping criterion was not satisfied, update info and
37306                          * store eigenvector number in array ifail.
37307                          */
37308                         *info = *info+1;
37309                         ifail->ptr.p_int[*info] = j;
37310                         break;
37311                     }
37312 
37313                     /*
37314                      * Normalize and scale the righthand side vector Pb.
37315                      */
37316                     v = (double)(0);
37317                     for(ti=1; ti<=blksiz; ti++)
37318                     {
37319                         v = v+ae_fabs(work1.ptr.p_double[ti], _state);
37320                     }
37321                     scl = blksiz*onenrm*ae_maxreal(eps, ae_fabs(work4.ptr.p_double[blksiz], _state), _state)/v;
37322                     ae_v_muld(&work1.ptr.p_double[1], 1, ae_v_len(1,blksiz), scl);
37323 
37324                     /*
37325                      * Solve the system LU = Pb.
37326                      */
37327                     evd_tdininternaldlagts(blksiz, &work4, &work2, &work3, &work5, &iwork, &work1, &tol, &iinfo, _state);
37328 
37329                     /*
37330                      * Reorthogonalize by modified Gram-Schmidt if eigenvalues are
37331                      * close enough.
37332                      */
37333                     if( jblk!=1 )
37334                     {
37335                         if( ae_fp_greater(ae_fabs(xj-xjm, _state),ortol) )
37336                         {
37337                             gpind = j;
37338                         }
37339                         if( gpind!=j )
37340                         {
37341                             for(i=gpind; i<=j-1; i++)
37342                             {
37343                                 i1 = b1;
37344                                 i2 = b1+blksiz-1;
37345                                 ztr = ae_v_dotproduct(&work1.ptr.p_double[1], 1, &z->ptr.pp_double[i1][i], z->stride, ae_v_len(1,blksiz));
37346                                 ae_v_subd(&work1.ptr.p_double[1], 1, &z->ptr.pp_double[i1][i], z->stride, ae_v_len(1,blksiz), ztr);
37347                                 touchint(&i2, _state);
37348                             }
37349                         }
37350                     }
37351 
37352                     /*
37353                      * Check the infinity norm of the iterate.
37354                      */
37355                     jmax = vectoridxabsmax(&work1, 1, blksiz, _state);
37356                     nrm = ae_fabs(work1.ptr.p_double[jmax], _state);
37357 
37358                     /*
37359                      * Continue for additional iterations after norm reaches
37360                      * stopping criterion.
37361                      */
37362                     tmpcriterion = ae_false;
37363                     if( ae_fp_less(nrm,dtpcrt) )
37364                     {
37365                         tmpcriterion = ae_true;
37366                     }
37367                     else
37368                     {
37369                         nrmchk = nrmchk+1;
37370                         if( nrmchk<extra+1 )
37371                         {
37372                             tmpcriterion = ae_true;
37373                         }
37374                     }
37375                 }
37376                 while(tmpcriterion);
37377 
37378                 /*
37379                  * Accept iterate as jth eigenvector.
37380                  */
37381                 scl = 1/vectornorm2(&work1, 1, blksiz, _state);
37382                 jmax = vectoridxabsmax(&work1, 1, blksiz, _state);
37383                 if( ae_fp_less(work1.ptr.p_double[jmax],(double)(0)) )
37384                 {
37385                     scl = -scl;
37386                 }
37387                 ae_v_muld(&work1.ptr.p_double[1], 1, ae_v_len(1,blksiz), scl);
37388             }
37389             for(i=1; i<=n; i++)
37390             {
37391                 z->ptr.pp_double[i][j] = (double)(0);
37392             }
37393             for(i=1; i<=blksiz; i++)
37394             {
37395                 z->ptr.pp_double[b1+i-1][j] = work1.ptr.p_double[i];
37396             }
37397 
37398             /*
37399              * Save the shift to check eigenvalue spacing at next
37400              * iteration.
37401              */
37402             xjm = xj;
37403         }
37404     }
37405     ae_frame_leave(_state);
37406 }
37407 
37408 
evd_tdininternaldlagtf(ae_int_t n,ae_vector * a,double lambdav,ae_vector * b,ae_vector * c,double tol,ae_vector * d,ae_vector * iin,ae_int_t * info,ae_state * _state)37409 static void evd_tdininternaldlagtf(ae_int_t n,
37410      /* Real    */ ae_vector* a,
37411      double lambdav,
37412      /* Real    */ ae_vector* b,
37413      /* Real    */ ae_vector* c,
37414      double tol,
37415      /* Real    */ ae_vector* d,
37416      /* Integer */ ae_vector* iin,
37417      ae_int_t* info,
37418      ae_state *_state)
37419 {
37420     ae_int_t k;
37421     double eps;
37422     double mult;
37423     double piv1;
37424     double piv2;
37425     double scale1;
37426     double scale2;
37427     double temp;
37428     double tl;
37429 
37430     *info = 0;
37431 
37432     *info = 0;
37433     if( n<0 )
37434     {
37435         *info = -1;
37436         return;
37437     }
37438     if( n==0 )
37439     {
37440         return;
37441     }
37442     a->ptr.p_double[1] = a->ptr.p_double[1]-lambdav;
37443     iin->ptr.p_int[n] = 0;
37444     if( n==1 )
37445     {
37446         if( ae_fp_eq(a->ptr.p_double[1],(double)(0)) )
37447         {
37448             iin->ptr.p_int[1] = 1;
37449         }
37450         return;
37451     }
37452     eps = ae_machineepsilon;
37453     tl = ae_maxreal(tol, eps, _state);
37454     scale1 = ae_fabs(a->ptr.p_double[1], _state)+ae_fabs(b->ptr.p_double[1], _state);
37455     for(k=1; k<=n-1; k++)
37456     {
37457         a->ptr.p_double[k+1] = a->ptr.p_double[k+1]-lambdav;
37458         scale2 = ae_fabs(c->ptr.p_double[k], _state)+ae_fabs(a->ptr.p_double[k+1], _state);
37459         if( k<n-1 )
37460         {
37461             scale2 = scale2+ae_fabs(b->ptr.p_double[k+1], _state);
37462         }
37463         if( ae_fp_eq(a->ptr.p_double[k],(double)(0)) )
37464         {
37465             piv1 = (double)(0);
37466         }
37467         else
37468         {
37469             piv1 = ae_fabs(a->ptr.p_double[k], _state)/scale1;
37470         }
37471         if( ae_fp_eq(c->ptr.p_double[k],(double)(0)) )
37472         {
37473             iin->ptr.p_int[k] = 0;
37474             piv2 = (double)(0);
37475             scale1 = scale2;
37476             if( k<n-1 )
37477             {
37478                 d->ptr.p_double[k] = (double)(0);
37479             }
37480         }
37481         else
37482         {
37483             piv2 = ae_fabs(c->ptr.p_double[k], _state)/scale2;
37484             if( ae_fp_less_eq(piv2,piv1) )
37485             {
37486                 iin->ptr.p_int[k] = 0;
37487                 scale1 = scale2;
37488                 c->ptr.p_double[k] = c->ptr.p_double[k]/a->ptr.p_double[k];
37489                 a->ptr.p_double[k+1] = a->ptr.p_double[k+1]-c->ptr.p_double[k]*b->ptr.p_double[k];
37490                 if( k<n-1 )
37491                 {
37492                     d->ptr.p_double[k] = (double)(0);
37493                 }
37494             }
37495             else
37496             {
37497                 iin->ptr.p_int[k] = 1;
37498                 mult = a->ptr.p_double[k]/c->ptr.p_double[k];
37499                 a->ptr.p_double[k] = c->ptr.p_double[k];
37500                 temp = a->ptr.p_double[k+1];
37501                 a->ptr.p_double[k+1] = b->ptr.p_double[k]-mult*temp;
37502                 if( k<n-1 )
37503                 {
37504                     d->ptr.p_double[k] = b->ptr.p_double[k+1];
37505                     b->ptr.p_double[k+1] = -mult*d->ptr.p_double[k];
37506                 }
37507                 b->ptr.p_double[k] = temp;
37508                 c->ptr.p_double[k] = mult;
37509             }
37510         }
37511         if( ae_fp_less_eq(ae_maxreal(piv1, piv2, _state),tl)&&iin->ptr.p_int[n]==0 )
37512         {
37513             iin->ptr.p_int[n] = k;
37514         }
37515     }
37516     if( ae_fp_less_eq(ae_fabs(a->ptr.p_double[n], _state),scale1*tl)&&iin->ptr.p_int[n]==0 )
37517     {
37518         iin->ptr.p_int[n] = n;
37519     }
37520 }
37521 
37522 
evd_tdininternaldlagts(ae_int_t n,ae_vector * a,ae_vector * b,ae_vector * c,ae_vector * d,ae_vector * iin,ae_vector * y,double * tol,ae_int_t * info,ae_state * _state)37523 static void evd_tdininternaldlagts(ae_int_t n,
37524      /* Real    */ ae_vector* a,
37525      /* Real    */ ae_vector* b,
37526      /* Real    */ ae_vector* c,
37527      /* Real    */ ae_vector* d,
37528      /* Integer */ ae_vector* iin,
37529      /* Real    */ ae_vector* y,
37530      double* tol,
37531      ae_int_t* info,
37532      ae_state *_state)
37533 {
37534     ae_int_t k;
37535     double absak;
37536     double ak;
37537     double bignum;
37538     double eps;
37539     double pert;
37540     double sfmin;
37541     double temp;
37542 
37543     *info = 0;
37544 
37545     *info = 0;
37546     if( n<0 )
37547     {
37548         *info = -1;
37549         return;
37550     }
37551     if( n==0 )
37552     {
37553         return;
37554     }
37555     eps = ae_machineepsilon;
37556     sfmin = ae_minrealnumber;
37557     bignum = 1/sfmin;
37558     if( ae_fp_less_eq(*tol,(double)(0)) )
37559     {
37560         *tol = ae_fabs(a->ptr.p_double[1], _state);
37561         if( n>1 )
37562         {
37563             *tol = ae_maxreal(*tol, ae_maxreal(ae_fabs(a->ptr.p_double[2], _state), ae_fabs(b->ptr.p_double[1], _state), _state), _state);
37564         }
37565         for(k=3; k<=n; k++)
37566         {
37567             *tol = ae_maxreal(*tol, ae_maxreal(ae_fabs(a->ptr.p_double[k], _state), ae_maxreal(ae_fabs(b->ptr.p_double[k-1], _state), ae_fabs(d->ptr.p_double[k-2], _state), _state), _state), _state);
37568         }
37569         *tol = *tol*eps;
37570         if( ae_fp_eq(*tol,(double)(0)) )
37571         {
37572             *tol = eps;
37573         }
37574     }
37575     for(k=2; k<=n; k++)
37576     {
37577         if( iin->ptr.p_int[k-1]==0 )
37578         {
37579             y->ptr.p_double[k] = y->ptr.p_double[k]-c->ptr.p_double[k-1]*y->ptr.p_double[k-1];
37580         }
37581         else
37582         {
37583             temp = y->ptr.p_double[k-1];
37584             y->ptr.p_double[k-1] = y->ptr.p_double[k];
37585             y->ptr.p_double[k] = temp-c->ptr.p_double[k-1]*y->ptr.p_double[k];
37586         }
37587     }
37588     for(k=n; k>=1; k--)
37589     {
37590         if( k<=n-2 )
37591         {
37592             temp = y->ptr.p_double[k]-b->ptr.p_double[k]*y->ptr.p_double[k+1]-d->ptr.p_double[k]*y->ptr.p_double[k+2];
37593         }
37594         else
37595         {
37596             if( k==n-1 )
37597             {
37598                 temp = y->ptr.p_double[k]-b->ptr.p_double[k]*y->ptr.p_double[k+1];
37599             }
37600             else
37601             {
37602                 temp = y->ptr.p_double[k];
37603             }
37604         }
37605         ak = a->ptr.p_double[k];
37606         pert = ae_fabs(*tol, _state);
37607         if( ae_fp_less(ak,(double)(0)) )
37608         {
37609             pert = -pert;
37610         }
37611         for(;;)
37612         {
37613             absak = ae_fabs(ak, _state);
37614             if( ae_fp_less(absak,(double)(1)) )
37615             {
37616                 if( ae_fp_less(absak,sfmin) )
37617                 {
37618                     if( ae_fp_eq(absak,(double)(0))||ae_fp_greater(ae_fabs(temp, _state)*sfmin,absak) )
37619                     {
37620                         ak = ak+pert;
37621                         pert = 2*pert;
37622                         continue;
37623                     }
37624                     else
37625                     {
37626                         temp = temp*bignum;
37627                         ak = ak*bignum;
37628                     }
37629                 }
37630                 else
37631                 {
37632                     if( ae_fp_greater(ae_fabs(temp, _state),absak*bignum) )
37633                     {
37634                         ak = ak+pert;
37635                         pert = 2*pert;
37636                         continue;
37637                     }
37638                 }
37639             }
37640             break;
37641         }
37642         y->ptr.p_double[k] = temp/ak;
37643     }
37644 }
37645 
37646 
evd_internaldlaebz(ae_int_t ijob,ae_int_t nitmax,ae_int_t n,ae_int_t mmax,ae_int_t minp,double abstol,double reltol,double pivmin,ae_vector * d,ae_vector * e,ae_vector * e2,ae_vector * nval,ae_matrix * ab,ae_vector * c,ae_int_t * mout,ae_matrix * nab,ae_vector * work,ae_vector * iwork,ae_int_t * info,ae_state * _state)37647 static void evd_internaldlaebz(ae_int_t ijob,
37648      ae_int_t nitmax,
37649      ae_int_t n,
37650      ae_int_t mmax,
37651      ae_int_t minp,
37652      double abstol,
37653      double reltol,
37654      double pivmin,
37655      /* Real    */ ae_vector* d,
37656      /* Real    */ ae_vector* e,
37657      /* Real    */ ae_vector* e2,
37658      /* Integer */ ae_vector* nval,
37659      /* Real    */ ae_matrix* ab,
37660      /* Real    */ ae_vector* c,
37661      ae_int_t* mout,
37662      /* Integer */ ae_matrix* nab,
37663      /* Real    */ ae_vector* work,
37664      /* Integer */ ae_vector* iwork,
37665      ae_int_t* info,
37666      ae_state *_state)
37667 {
37668     ae_int_t itmp1;
37669     ae_int_t itmp2;
37670     ae_int_t j;
37671     ae_int_t ji;
37672     ae_int_t jit;
37673     ae_int_t jp;
37674     ae_int_t kf;
37675     ae_int_t kfnew;
37676     ae_int_t kl;
37677     ae_int_t klnew;
37678     double tmp1;
37679     double tmp2;
37680 
37681     *mout = 0;
37682     *info = 0;
37683 
37684     *info = 0;
37685     if( ijob<1||ijob>3 )
37686     {
37687         *info = -1;
37688         return;
37689     }
37690 
37691     /*
37692      * Initialize NAB
37693      */
37694     if( ijob==1 )
37695     {
37696 
37697         /*
37698          * Compute the number of eigenvalues in the initial intervals.
37699          */
37700         *mout = 0;
37701 
37702         /*
37703          *DIR$ NOVECTOR
37704          */
37705         for(ji=1; ji<=minp; ji++)
37706         {
37707             for(jp=1; jp<=2; jp++)
37708             {
37709                 tmp1 = d->ptr.p_double[1]-ab->ptr.pp_double[ji][jp];
37710                 if( ae_fp_less(ae_fabs(tmp1, _state),pivmin) )
37711                 {
37712                     tmp1 = -pivmin;
37713                 }
37714                 nab->ptr.pp_int[ji][jp] = 0;
37715                 if( ae_fp_less_eq(tmp1,(double)(0)) )
37716                 {
37717                     nab->ptr.pp_int[ji][jp] = 1;
37718                 }
37719                 for(j=2; j<=n; j++)
37720                 {
37721                     tmp1 = d->ptr.p_double[j]-e2->ptr.p_double[j-1]/tmp1-ab->ptr.pp_double[ji][jp];
37722                     if( ae_fp_less(ae_fabs(tmp1, _state),pivmin) )
37723                     {
37724                         tmp1 = -pivmin;
37725                     }
37726                     if( ae_fp_less_eq(tmp1,(double)(0)) )
37727                     {
37728                         nab->ptr.pp_int[ji][jp] = nab->ptr.pp_int[ji][jp]+1;
37729                     }
37730                 }
37731             }
37732             *mout = *mout+nab->ptr.pp_int[ji][2]-nab->ptr.pp_int[ji][1];
37733         }
37734         return;
37735     }
37736 
37737     /*
37738      * Initialize for loop
37739      *
37740      * KF and KL have the following meaning:
37741      *   Intervals 1,...,KF-1 have converged.
37742      *   Intervals KF,...,KL  still need to be refined.
37743      */
37744     kf = 1;
37745     kl = minp;
37746 
37747     /*
37748      * If IJOB=2, initialize C.
37749      * If IJOB=3, use the user-supplied starting point.
37750      */
37751     if( ijob==2 )
37752     {
37753         for(ji=1; ji<=minp; ji++)
37754         {
37755             c->ptr.p_double[ji] = 0.5*(ab->ptr.pp_double[ji][1]+ab->ptr.pp_double[ji][2]);
37756         }
37757     }
37758 
37759     /*
37760      * Iteration loop
37761      */
37762     for(jit=1; jit<=nitmax; jit++)
37763     {
37764 
37765         /*
37766          * Loop over intervals
37767          *
37768          *
37769          * Serial Version of the loop
37770          */
37771         klnew = kl;
37772         for(ji=kf; ji<=kl; ji++)
37773         {
37774 
37775             /*
37776              * Compute N(w), the number of eigenvalues less than w
37777              */
37778             tmp1 = c->ptr.p_double[ji];
37779             tmp2 = d->ptr.p_double[1]-tmp1;
37780             itmp1 = 0;
37781             if( ae_fp_less_eq(tmp2,pivmin) )
37782             {
37783                 itmp1 = 1;
37784                 tmp2 = ae_minreal(tmp2, -pivmin, _state);
37785             }
37786 
37787             /*
37788              * A series of compiler directives to defeat vectorization
37789              * for the next loop
37790              *
37791              **$PL$ CMCHAR=' '
37792              *CDIR$          NEXTSCALAR
37793              *C$DIR          SCALAR
37794              *CDIR$          NEXT SCALAR
37795              *CVD$L          NOVECTOR
37796              *CDEC$          NOVECTOR
37797              *CVD$           NOVECTOR
37798              **VDIR          NOVECTOR
37799              **VOCL          LOOP,SCALAR
37800              *CIBM           PREFER SCALAR
37801              **$PL$ CMCHAR='*'
37802              */
37803             for(j=2; j<=n; j++)
37804             {
37805                 tmp2 = d->ptr.p_double[j]-e2->ptr.p_double[j-1]/tmp2-tmp1;
37806                 if( ae_fp_less_eq(tmp2,pivmin) )
37807                 {
37808                     itmp1 = itmp1+1;
37809                     tmp2 = ae_minreal(tmp2, -pivmin, _state);
37810                 }
37811             }
37812             if( ijob<=2 )
37813             {
37814 
37815                 /*
37816                  * IJOB=2: Choose all intervals containing eigenvalues.
37817                  *
37818                  * Insure that N(w) is monotone
37819                  */
37820                 itmp1 = ae_minint(nab->ptr.pp_int[ji][2], ae_maxint(nab->ptr.pp_int[ji][1], itmp1, _state), _state);
37821 
37822                 /*
37823                  * Update the Queue -- add intervals if both halves
37824                  * contain eigenvalues.
37825                  */
37826                 if( itmp1==nab->ptr.pp_int[ji][2] )
37827                 {
37828 
37829                     /*
37830                      * No eigenvalue in the upper interval:
37831                      * just use the lower interval.
37832                      */
37833                     ab->ptr.pp_double[ji][2] = tmp1;
37834                 }
37835                 else
37836                 {
37837                     if( itmp1==nab->ptr.pp_int[ji][1] )
37838                     {
37839 
37840                         /*
37841                          * No eigenvalue in the lower interval:
37842                          * just use the upper interval.
37843                          */
37844                         ab->ptr.pp_double[ji][1] = tmp1;
37845                     }
37846                     else
37847                     {
37848                         if( klnew<mmax )
37849                         {
37850 
37851                             /*
37852                              * Eigenvalue in both intervals -- add upper to queue.
37853                              */
37854                             klnew = klnew+1;
37855                             ab->ptr.pp_double[klnew][2] = ab->ptr.pp_double[ji][2];
37856                             nab->ptr.pp_int[klnew][2] = nab->ptr.pp_int[ji][2];
37857                             ab->ptr.pp_double[klnew][1] = tmp1;
37858                             nab->ptr.pp_int[klnew][1] = itmp1;
37859                             ab->ptr.pp_double[ji][2] = tmp1;
37860                             nab->ptr.pp_int[ji][2] = itmp1;
37861                         }
37862                         else
37863                         {
37864                             *info = mmax+1;
37865                             return;
37866                         }
37867                     }
37868                 }
37869             }
37870             else
37871             {
37872 
37873                 /*
37874                  * IJOB=3: Binary search.  Keep only the interval
37875                  * containing  w  s.t. N(w) = NVAL
37876                  */
37877                 if( itmp1<=nval->ptr.p_int[ji] )
37878                 {
37879                     ab->ptr.pp_double[ji][1] = tmp1;
37880                     nab->ptr.pp_int[ji][1] = itmp1;
37881                 }
37882                 if( itmp1>=nval->ptr.p_int[ji] )
37883                 {
37884                     ab->ptr.pp_double[ji][2] = tmp1;
37885                     nab->ptr.pp_int[ji][2] = itmp1;
37886                 }
37887             }
37888         }
37889         kl = klnew;
37890 
37891         /*
37892          * Check for convergence
37893          */
37894         kfnew = kf;
37895         for(ji=kf; ji<=kl; ji++)
37896         {
37897             tmp1 = ae_fabs(ab->ptr.pp_double[ji][2]-ab->ptr.pp_double[ji][1], _state);
37898             tmp2 = ae_maxreal(ae_fabs(ab->ptr.pp_double[ji][2], _state), ae_fabs(ab->ptr.pp_double[ji][1], _state), _state);
37899             if( ae_fp_less(tmp1,ae_maxreal(abstol, ae_maxreal(pivmin, reltol*tmp2, _state), _state))||nab->ptr.pp_int[ji][1]>=nab->ptr.pp_int[ji][2] )
37900             {
37901 
37902                 /*
37903                  * Converged -- Swap with position KFNEW,
37904                  * then increment KFNEW
37905                  */
37906                 if( ji>kfnew )
37907                 {
37908                     tmp1 = ab->ptr.pp_double[ji][1];
37909                     tmp2 = ab->ptr.pp_double[ji][2];
37910                     itmp1 = nab->ptr.pp_int[ji][1];
37911                     itmp2 = nab->ptr.pp_int[ji][2];
37912                     ab->ptr.pp_double[ji][1] = ab->ptr.pp_double[kfnew][1];
37913                     ab->ptr.pp_double[ji][2] = ab->ptr.pp_double[kfnew][2];
37914                     nab->ptr.pp_int[ji][1] = nab->ptr.pp_int[kfnew][1];
37915                     nab->ptr.pp_int[ji][2] = nab->ptr.pp_int[kfnew][2];
37916                     ab->ptr.pp_double[kfnew][1] = tmp1;
37917                     ab->ptr.pp_double[kfnew][2] = tmp2;
37918                     nab->ptr.pp_int[kfnew][1] = itmp1;
37919                     nab->ptr.pp_int[kfnew][2] = itmp2;
37920                     if( ijob==3 )
37921                     {
37922                         itmp1 = nval->ptr.p_int[ji];
37923                         nval->ptr.p_int[ji] = nval->ptr.p_int[kfnew];
37924                         nval->ptr.p_int[kfnew] = itmp1;
37925                     }
37926                 }
37927                 kfnew = kfnew+1;
37928             }
37929         }
37930         kf = kfnew;
37931 
37932         /*
37933          * Choose Midpoints
37934          */
37935         for(ji=kf; ji<=kl; ji++)
37936         {
37937             c->ptr.p_double[ji] = 0.5*(ab->ptr.pp_double[ji][1]+ab->ptr.pp_double[ji][2]);
37938         }
37939 
37940         /*
37941          * If no more intervals to refine, quit.
37942          */
37943         if( kf>kl )
37944         {
37945             break;
37946         }
37947     }
37948 
37949     /*
37950      * Converged
37951      */
37952     *info = ae_maxint(kl+1-kf, 0, _state);
37953     *mout = kl;
37954 }
37955 
37956 
37957 /*************************************************************************
37958 Internal subroutine
37959 
37960   -- LAPACK routine (version 3.0) --
37961      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
37962      Courant Institute, Argonne National Lab, and Rice University
37963      June 30, 1999
37964 *************************************************************************/
evd_rmatrixinternaltrevc(ae_matrix * t,ae_int_t n,ae_int_t side,ae_int_t howmny,ae_vector * vselect,ae_matrix * vl,ae_matrix * vr,ae_int_t * m,ae_int_t * info,ae_state * _state)37965 static void evd_rmatrixinternaltrevc(/* Real    */ ae_matrix* t,
37966      ae_int_t n,
37967      ae_int_t side,
37968      ae_int_t howmny,
37969      /* Boolean */ ae_vector* vselect,
37970      /* Real    */ ae_matrix* vl,
37971      /* Real    */ ae_matrix* vr,
37972      ae_int_t* m,
37973      ae_int_t* info,
37974      ae_state *_state)
37975 {
37976     ae_frame _frame_block;
37977     ae_vector _vselect;
37978     ae_int_t i;
37979     ae_int_t j;
37980     ae_matrix t1;
37981     ae_matrix vl1;
37982     ae_matrix vr1;
37983     ae_vector vselect1;
37984 
37985     ae_frame_make(_state, &_frame_block);
37986     memset(&_vselect, 0, sizeof(_vselect));
37987     memset(&t1, 0, sizeof(t1));
37988     memset(&vl1, 0, sizeof(vl1));
37989     memset(&vr1, 0, sizeof(vr1));
37990     memset(&vselect1, 0, sizeof(vselect1));
37991     ae_vector_init_copy(&_vselect, vselect, _state, ae_true);
37992     vselect = &_vselect;
37993     *m = 0;
37994     *info = 0;
37995     ae_matrix_init(&t1, 0, 0, DT_REAL, _state, ae_true);
37996     ae_matrix_init(&vl1, 0, 0, DT_REAL, _state, ae_true);
37997     ae_matrix_init(&vr1, 0, 0, DT_REAL, _state, ae_true);
37998     ae_vector_init(&vselect1, 0, DT_BOOL, _state, ae_true);
37999 
38000 
38001     /*
38002      * Allocate VL/VR, if needed
38003      */
38004     if( howmny==2||howmny==3 )
38005     {
38006         if( side==1||side==3 )
38007         {
38008             rmatrixsetlengthatleast(vr, n, n, _state);
38009         }
38010         if( side==2||side==3 )
38011         {
38012             rmatrixsetlengthatleast(vl, n, n, _state);
38013         }
38014     }
38015 
38016     /*
38017      * Try to use MKL kernel
38018      */
38019     if( rmatrixinternaltrevcmkl(t, n, side, howmny, vl, vr, m, info, _state) )
38020     {
38021         ae_frame_leave(_state);
38022         return;
38023     }
38024 
38025     /*
38026      * ALGLIB version
38027      */
38028     ae_matrix_set_length(&t1, n+1, n+1, _state);
38029     for(i=0; i<=n-1; i++)
38030     {
38031         for(j=0; j<=n-1; j++)
38032         {
38033             t1.ptr.pp_double[i+1][j+1] = t->ptr.pp_double[i][j];
38034         }
38035     }
38036     if( howmny==3 )
38037     {
38038         ae_vector_set_length(&vselect1, n+1, _state);
38039         for(i=0; i<=n-1; i++)
38040         {
38041             vselect1.ptr.p_bool[1+i] = vselect->ptr.p_bool[i];
38042         }
38043     }
38044     if( (side==2||side==3)&&howmny==1 )
38045     {
38046         ae_matrix_set_length(&vl1, n+1, n+1, _state);
38047         for(i=0; i<=n-1; i++)
38048         {
38049             for(j=0; j<=n-1; j++)
38050             {
38051                 vl1.ptr.pp_double[i+1][j+1] = vl->ptr.pp_double[i][j];
38052             }
38053         }
38054     }
38055     if( (side==1||side==3)&&howmny==1 )
38056     {
38057         ae_matrix_set_length(&vr1, n+1, n+1, _state);
38058         for(i=0; i<=n-1; i++)
38059         {
38060             for(j=0; j<=n-1; j++)
38061             {
38062                 vr1.ptr.pp_double[i+1][j+1] = vr->ptr.pp_double[i][j];
38063             }
38064         }
38065     }
38066     evd_internaltrevc(&t1, n, side, howmny, &vselect1, &vl1, &vr1, m, info, _state);
38067     if( side!=1 )
38068     {
38069         rmatrixsetlengthatleast(vl, n, n, _state);
38070         for(i=0; i<=n-1; i++)
38071         {
38072             for(j=0; j<=n-1; j++)
38073             {
38074                 vl->ptr.pp_double[i][j] = vl1.ptr.pp_double[i+1][j+1];
38075             }
38076         }
38077     }
38078     if( side!=2 )
38079     {
38080         rmatrixsetlengthatleast(vr, n, n, _state);
38081         for(i=0; i<=n-1; i++)
38082         {
38083             for(j=0; j<=n-1; j++)
38084             {
38085                 vr->ptr.pp_double[i][j] = vr1.ptr.pp_double[i+1][j+1];
38086             }
38087         }
38088     }
38089     ae_frame_leave(_state);
38090 }
38091 
38092 
38093 /*************************************************************************
38094 Internal subroutine
38095 
38096   -- LAPACK routine (version 3.0) --
38097      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
38098      Courant Institute, Argonne National Lab, and Rice University
38099      June 30, 1999
38100 *************************************************************************/
evd_internaltrevc(ae_matrix * t,ae_int_t n,ae_int_t side,ae_int_t howmny,ae_vector * vselect,ae_matrix * vl,ae_matrix * vr,ae_int_t * m,ae_int_t * info,ae_state * _state)38101 static void evd_internaltrevc(/* Real    */ ae_matrix* t,
38102      ae_int_t n,
38103      ae_int_t side,
38104      ae_int_t howmny,
38105      /* Boolean */ ae_vector* vselect,
38106      /* Real    */ ae_matrix* vl,
38107      /* Real    */ ae_matrix* vr,
38108      ae_int_t* m,
38109      ae_int_t* info,
38110      ae_state *_state)
38111 {
38112     ae_frame _frame_block;
38113     ae_vector _vselect;
38114     ae_bool allv;
38115     ae_bool bothv;
38116     ae_bool leftv;
38117     ae_bool over;
38118     ae_bool pair;
38119     ae_bool rightv;
38120     ae_bool somev;
38121     ae_int_t i;
38122     ae_int_t ierr;
38123     ae_int_t ii;
38124     ae_int_t ip;
38125     ae_int_t iis;
38126     ae_int_t j;
38127     ae_int_t j1;
38128     ae_int_t j2;
38129     ae_int_t jnxt;
38130     ae_int_t k;
38131     ae_int_t ki;
38132     ae_int_t n2;
38133     double beta;
38134     double bignum;
38135     double emax;
38136     double rec;
38137     double remax;
38138     double scl;
38139     double smin;
38140     double smlnum;
38141     double ulp;
38142     double unfl;
38143     double vcrit;
38144     double vmax;
38145     double wi;
38146     double wr;
38147     double xnorm;
38148     ae_matrix x;
38149     ae_vector work;
38150     ae_vector temp;
38151     ae_matrix temp11;
38152     ae_matrix temp22;
38153     ae_matrix temp11b;
38154     ae_matrix temp21b;
38155     ae_matrix temp12b;
38156     ae_matrix temp22b;
38157     ae_bool skipflag;
38158     ae_int_t k1;
38159     ae_int_t k2;
38160     ae_int_t k3;
38161     ae_int_t k4;
38162     double vt;
38163     ae_vector rswap4;
38164     ae_vector zswap4;
38165     ae_matrix ipivot44;
38166     ae_vector civ4;
38167     ae_vector crv4;
38168 
38169     ae_frame_make(_state, &_frame_block);
38170     memset(&_vselect, 0, sizeof(_vselect));
38171     memset(&x, 0, sizeof(x));
38172     memset(&work, 0, sizeof(work));
38173     memset(&temp, 0, sizeof(temp));
38174     memset(&temp11, 0, sizeof(temp11));
38175     memset(&temp22, 0, sizeof(temp22));
38176     memset(&temp11b, 0, sizeof(temp11b));
38177     memset(&temp21b, 0, sizeof(temp21b));
38178     memset(&temp12b, 0, sizeof(temp12b));
38179     memset(&temp22b, 0, sizeof(temp22b));
38180     memset(&rswap4, 0, sizeof(rswap4));
38181     memset(&zswap4, 0, sizeof(zswap4));
38182     memset(&ipivot44, 0, sizeof(ipivot44));
38183     memset(&civ4, 0, sizeof(civ4));
38184     memset(&crv4, 0, sizeof(crv4));
38185     ae_vector_init_copy(&_vselect, vselect, _state, ae_true);
38186     vselect = &_vselect;
38187     *m = 0;
38188     *info = 0;
38189     ae_matrix_init(&x, 0, 0, DT_REAL, _state, ae_true);
38190     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
38191     ae_vector_init(&temp, 0, DT_REAL, _state, ae_true);
38192     ae_matrix_init(&temp11, 0, 0, DT_REAL, _state, ae_true);
38193     ae_matrix_init(&temp22, 0, 0, DT_REAL, _state, ae_true);
38194     ae_matrix_init(&temp11b, 0, 0, DT_REAL, _state, ae_true);
38195     ae_matrix_init(&temp21b, 0, 0, DT_REAL, _state, ae_true);
38196     ae_matrix_init(&temp12b, 0, 0, DT_REAL, _state, ae_true);
38197     ae_matrix_init(&temp22b, 0, 0, DT_REAL, _state, ae_true);
38198     ae_vector_init(&rswap4, 0, DT_BOOL, _state, ae_true);
38199     ae_vector_init(&zswap4, 0, DT_BOOL, _state, ae_true);
38200     ae_matrix_init(&ipivot44, 0, 0, DT_INT, _state, ae_true);
38201     ae_vector_init(&civ4, 0, DT_REAL, _state, ae_true);
38202     ae_vector_init(&crv4, 0, DT_REAL, _state, ae_true);
38203 
38204     ae_matrix_set_length(&x, 2+1, 2+1, _state);
38205     ae_matrix_set_length(&temp11, 1+1, 1+1, _state);
38206     ae_matrix_set_length(&temp11b, 1+1, 1+1, _state);
38207     ae_matrix_set_length(&temp21b, 2+1, 1+1, _state);
38208     ae_matrix_set_length(&temp12b, 1+1, 2+1, _state);
38209     ae_matrix_set_length(&temp22b, 2+1, 2+1, _state);
38210     ae_matrix_set_length(&temp22, 2+1, 2+1, _state);
38211     ae_vector_set_length(&work, 3*n+1, _state);
38212     ae_vector_set_length(&temp, n+1, _state);
38213     ae_vector_set_length(&rswap4, 4+1, _state);
38214     ae_vector_set_length(&zswap4, 4+1, _state);
38215     ae_matrix_set_length(&ipivot44, 4+1, 4+1, _state);
38216     ae_vector_set_length(&civ4, 4+1, _state);
38217     ae_vector_set_length(&crv4, 4+1, _state);
38218     if( howmny!=1 )
38219     {
38220         if( side==1||side==3 )
38221         {
38222             ae_matrix_set_length(vr, n+1, n+1, _state);
38223         }
38224         if( side==2||side==3 )
38225         {
38226             ae_matrix_set_length(vl, n+1, n+1, _state);
38227         }
38228     }
38229 
38230     /*
38231      * Decode and test the input parameters
38232      */
38233     bothv = side==3;
38234     rightv = side==1||bothv;
38235     leftv = side==2||bothv;
38236     allv = howmny==2;
38237     over = howmny==1;
38238     somev = howmny==3;
38239     *info = 0;
38240     if( n<0 )
38241     {
38242         *info = -2;
38243         ae_frame_leave(_state);
38244         return;
38245     }
38246     if( !rightv&&!leftv )
38247     {
38248         *info = -3;
38249         ae_frame_leave(_state);
38250         return;
38251     }
38252     if( (!allv&&!over)&&!somev )
38253     {
38254         *info = -4;
38255         ae_frame_leave(_state);
38256         return;
38257     }
38258 
38259     /*
38260      * Set M to the number of columns required to store the selected
38261      * eigenvectors, standardize the array SELECT if necessary, and
38262      * test MM.
38263      */
38264     if( somev )
38265     {
38266         *m = 0;
38267         pair = ae_false;
38268         for(j=1; j<=n; j++)
38269         {
38270             if( pair )
38271             {
38272                 pair = ae_false;
38273                 vselect->ptr.p_bool[j] = ae_false;
38274             }
38275             else
38276             {
38277                 if( j<n )
38278                 {
38279                     if( ae_fp_eq(t->ptr.pp_double[j+1][j],(double)(0)) )
38280                     {
38281                         if( vselect->ptr.p_bool[j] )
38282                         {
38283                             *m = *m+1;
38284                         }
38285                     }
38286                     else
38287                     {
38288                         pair = ae_true;
38289                         if( vselect->ptr.p_bool[j]||vselect->ptr.p_bool[j+1] )
38290                         {
38291                             vselect->ptr.p_bool[j] = ae_true;
38292                             *m = *m+2;
38293                         }
38294                     }
38295                 }
38296                 else
38297                 {
38298                     if( vselect->ptr.p_bool[n] )
38299                     {
38300                         *m = *m+1;
38301                     }
38302                 }
38303             }
38304         }
38305     }
38306     else
38307     {
38308         *m = n;
38309     }
38310 
38311     /*
38312      * Quick return if possible.
38313      */
38314     if( n==0 )
38315     {
38316         ae_frame_leave(_state);
38317         return;
38318     }
38319 
38320     /*
38321      * Set the constants to control overflow.
38322      */
38323     unfl = ae_minrealnumber;
38324     ulp = ae_machineepsilon;
38325     smlnum = unfl*(n/ulp);
38326     bignum = (1-ulp)/smlnum;
38327 
38328     /*
38329      * Compute 1-norm of each column of strictly upper triangular
38330      * part of T to control overflow in triangular solver.
38331      */
38332     work.ptr.p_double[1] = (double)(0);
38333     for(j=2; j<=n; j++)
38334     {
38335         work.ptr.p_double[j] = (double)(0);
38336         for(i=1; i<=j-1; i++)
38337         {
38338             work.ptr.p_double[j] = work.ptr.p_double[j]+ae_fabs(t->ptr.pp_double[i][j], _state);
38339         }
38340     }
38341 
38342     /*
38343      * Index IP is used to specify the real or complex eigenvalue:
38344      * IP = 0, real eigenvalue,
38345      *      1, first of conjugate complex pair: (wr,wi)
38346      *     -1, second of conjugate complex pair: (wr,wi)
38347      */
38348     n2 = 2*n;
38349     if( rightv )
38350     {
38351 
38352         /*
38353          * Compute right eigenvectors.
38354          */
38355         ip = 0;
38356         iis = *m;
38357         for(ki=n; ki>=1; ki--)
38358         {
38359             skipflag = ae_false;
38360             if( ip==1 )
38361             {
38362                 skipflag = ae_true;
38363             }
38364             else
38365             {
38366                 if( ki!=1 )
38367                 {
38368                     if( ae_fp_neq(t->ptr.pp_double[ki][ki-1],(double)(0)) )
38369                     {
38370                         ip = -1;
38371                     }
38372                 }
38373                 if( somev )
38374                 {
38375                     if( ip==0 )
38376                     {
38377                         if( !vselect->ptr.p_bool[ki] )
38378                         {
38379                             skipflag = ae_true;
38380                         }
38381                     }
38382                     else
38383                     {
38384                         if( !vselect->ptr.p_bool[ki-1] )
38385                         {
38386                             skipflag = ae_true;
38387                         }
38388                     }
38389                 }
38390             }
38391             if( !skipflag )
38392             {
38393 
38394                 /*
38395                  * Compute the KI-th eigenvalue (WR,WI).
38396                  */
38397                 wr = t->ptr.pp_double[ki][ki];
38398                 wi = (double)(0);
38399                 if( ip!=0 )
38400                 {
38401                     wi = ae_sqrt(ae_fabs(t->ptr.pp_double[ki][ki-1], _state), _state)*ae_sqrt(ae_fabs(t->ptr.pp_double[ki-1][ki], _state), _state);
38402                 }
38403                 smin = ae_maxreal(ulp*(ae_fabs(wr, _state)+ae_fabs(wi, _state)), smlnum, _state);
38404                 if( ip==0 )
38405                 {
38406 
38407                     /*
38408                      * Real right eigenvector
38409                      */
38410                     work.ptr.p_double[ki+n] = (double)(1);
38411 
38412                     /*
38413                      * Form right-hand side
38414                      */
38415                     for(k=1; k<=ki-1; k++)
38416                     {
38417                         work.ptr.p_double[k+n] = -t->ptr.pp_double[k][ki];
38418                     }
38419 
38420                     /*
38421                      * Solve the upper quasi-triangular system:
38422                      *   (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
38423                      */
38424                     jnxt = ki-1;
38425                     for(j=ki-1; j>=1; j--)
38426                     {
38427                         if( j>jnxt )
38428                         {
38429                             continue;
38430                         }
38431                         j1 = j;
38432                         j2 = j;
38433                         jnxt = j-1;
38434                         if( j>1 )
38435                         {
38436                             if( ae_fp_neq(t->ptr.pp_double[j][j-1],(double)(0)) )
38437                             {
38438                                 j1 = j-1;
38439                                 jnxt = j-2;
38440                             }
38441                         }
38442                         if( j1==j2 )
38443                         {
38444 
38445                             /*
38446                              * 1-by-1 diagonal block
38447                              */
38448                             temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
38449                             temp11b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
38450                             evd_internalhsevdlaln2(ae_false, 1, 1, smin, (double)(1), &temp11, 1.0, 1.0, &temp11b, wr, 0.0, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
38451 
38452                             /*
38453                              * Scale X(1,1) to avoid overflow when updating
38454                              * the right-hand side.
38455                              */
38456                             if( ae_fp_greater(xnorm,(double)(1)) )
38457                             {
38458                                 if( ae_fp_greater(work.ptr.p_double[j],bignum/xnorm) )
38459                                 {
38460                                     x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm;
38461                                     scl = scl/xnorm;
38462                                 }
38463                             }
38464 
38465                             /*
38466                              * Scale if necessary
38467                              */
38468                             if( ae_fp_neq(scl,(double)(1)) )
38469                             {
38470                                 k1 = n+1;
38471                                 k2 = n+ki;
38472                                 ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
38473                             }
38474                             work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
38475 
38476                             /*
38477                              * Update right-hand side
38478                              */
38479                             k1 = 1+n;
38480                             k2 = j-1+n;
38481                             k3 = j-1;
38482                             vt = -x.ptr.pp_double[1][1];
38483                             ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(k1,k2), vt);
38484                         }
38485                         else
38486                         {
38487 
38488                             /*
38489                              * 2-by-2 diagonal block
38490                              */
38491                             temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j-1][j-1];
38492                             temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j-1][j];
38493                             temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j][j-1];
38494                             temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j][j];
38495                             temp21b.ptr.pp_double[1][1] = work.ptr.p_double[j-1+n];
38496                             temp21b.ptr.pp_double[2][1] = work.ptr.p_double[j+n];
38497                             evd_internalhsevdlaln2(ae_false, 2, 1, smin, 1.0, &temp22, 1.0, 1.0, &temp21b, wr, (double)(0), &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
38498 
38499                             /*
38500                              * Scale X(1,1) and X(2,1) to avoid overflow when
38501                              * updating the right-hand side.
38502                              */
38503                             if( ae_fp_greater(xnorm,(double)(1)) )
38504                             {
38505                                 beta = ae_maxreal(work.ptr.p_double[j-1], work.ptr.p_double[j], _state);
38506                                 if( ae_fp_greater(beta,bignum/xnorm) )
38507                                 {
38508                                     x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm;
38509                                     x.ptr.pp_double[2][1] = x.ptr.pp_double[2][1]/xnorm;
38510                                     scl = scl/xnorm;
38511                                 }
38512                             }
38513 
38514                             /*
38515                              * Scale if necessary
38516                              */
38517                             if( ae_fp_neq(scl,(double)(1)) )
38518                             {
38519                                 k1 = 1+n;
38520                                 k2 = ki+n;
38521                                 ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
38522                             }
38523                             work.ptr.p_double[j-1+n] = x.ptr.pp_double[1][1];
38524                             work.ptr.p_double[j+n] = x.ptr.pp_double[2][1];
38525 
38526                             /*
38527                              * Update right-hand side
38528                              */
38529                             k1 = 1+n;
38530                             k2 = j-2+n;
38531                             k3 = j-2;
38532                             k4 = j-1;
38533                             vt = -x.ptr.pp_double[1][1];
38534                             ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][k4], t->stride, ae_v_len(k1,k2), vt);
38535                             vt = -x.ptr.pp_double[2][1];
38536                             ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(k1,k2), vt);
38537                         }
38538                     }
38539 
38540                     /*
38541                      * Copy the vector x or Q*x to VR and normalize.
38542                      */
38543                     if( !over )
38544                     {
38545                         k1 = 1+n;
38546                         k2 = ki+n;
38547                         ae_v_move(&vr->ptr.pp_double[1][iis], vr->stride, &work.ptr.p_double[k1], 1, ae_v_len(1,ki));
38548                         ii = columnidxabsmax(vr, 1, ki, iis, _state);
38549                         remax = 1/ae_fabs(vr->ptr.pp_double[ii][iis], _state);
38550                         ae_v_muld(&vr->ptr.pp_double[1][iis], vr->stride, ae_v_len(1,ki), remax);
38551                         for(k=ki+1; k<=n; k++)
38552                         {
38553                             vr->ptr.pp_double[k][iis] = (double)(0);
38554                         }
38555                     }
38556                     else
38557                     {
38558                         if( ki>1 )
38559                         {
38560                             ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n));
38561                             matrixvectormultiply(vr, 1, n, 1, ki-1, ae_false, &work, 1+n, ki-1+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state);
38562                             ae_v_move(&vr->ptr.pp_double[1][ki], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
38563                         }
38564                         ii = columnidxabsmax(vr, 1, n, ki, _state);
38565                         remax = 1/ae_fabs(vr->ptr.pp_double[ii][ki], _state);
38566                         ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), remax);
38567                     }
38568                 }
38569                 else
38570                 {
38571 
38572                     /*
38573                      * Complex right eigenvector.
38574                      *
38575                      * Initial solve
38576                      *     [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
38577                      *     [ (T(KI,KI-1)   T(KI,KI)   )               ]
38578                      */
38579                     if( ae_fp_greater_eq(ae_fabs(t->ptr.pp_double[ki-1][ki], _state),ae_fabs(t->ptr.pp_double[ki][ki-1], _state)) )
38580                     {
38581                         work.ptr.p_double[ki-1+n] = (double)(1);
38582                         work.ptr.p_double[ki+n2] = wi/t->ptr.pp_double[ki-1][ki];
38583                     }
38584                     else
38585                     {
38586                         work.ptr.p_double[ki-1+n] = -wi/t->ptr.pp_double[ki][ki-1];
38587                         work.ptr.p_double[ki+n2] = (double)(1);
38588                     }
38589                     work.ptr.p_double[ki+n] = (double)(0);
38590                     work.ptr.p_double[ki-1+n2] = (double)(0);
38591 
38592                     /*
38593                      * Form right-hand side
38594                      */
38595                     for(k=1; k<=ki-2; k++)
38596                     {
38597                         work.ptr.p_double[k+n] = -work.ptr.p_double[ki-1+n]*t->ptr.pp_double[k][ki-1];
38598                         work.ptr.p_double[k+n2] = -work.ptr.p_double[ki+n2]*t->ptr.pp_double[k][ki];
38599                     }
38600 
38601                     /*
38602                      * Solve upper quasi-triangular system:
38603                      * (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
38604                      */
38605                     jnxt = ki-2;
38606                     for(j=ki-2; j>=1; j--)
38607                     {
38608                         if( j>jnxt )
38609                         {
38610                             continue;
38611                         }
38612                         j1 = j;
38613                         j2 = j;
38614                         jnxt = j-1;
38615                         if( j>1 )
38616                         {
38617                             if( ae_fp_neq(t->ptr.pp_double[j][j-1],(double)(0)) )
38618                             {
38619                                 j1 = j-1;
38620                                 jnxt = j-2;
38621                             }
38622                         }
38623                         if( j1==j2 )
38624                         {
38625 
38626                             /*
38627                              * 1-by-1 diagonal block
38628                              */
38629                             temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
38630                             temp12b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
38631                             temp12b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n];
38632                             evd_internalhsevdlaln2(ae_false, 1, 2, smin, 1.0, &temp11, 1.0, 1.0, &temp12b, wr, wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
38633 
38634                             /*
38635                              * Scale X(1,1) and X(1,2) to avoid overflow when
38636                              * updating the right-hand side.
38637                              */
38638                             if( ae_fp_greater(xnorm,(double)(1)) )
38639                             {
38640                                 if( ae_fp_greater(work.ptr.p_double[j],bignum/xnorm) )
38641                                 {
38642                                     x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]/xnorm;
38643                                     x.ptr.pp_double[1][2] = x.ptr.pp_double[1][2]/xnorm;
38644                                     scl = scl/xnorm;
38645                                 }
38646                             }
38647 
38648                             /*
38649                              * Scale if necessary
38650                              */
38651                             if( ae_fp_neq(scl,(double)(1)) )
38652                             {
38653                                 k1 = 1+n;
38654                                 k2 = ki+n;
38655                                 ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
38656                                 k1 = 1+n2;
38657                                 k2 = ki+n2;
38658                                 ae_v_muld(&work.ptr.p_double[k1], 1, ae_v_len(k1,k2), scl);
38659                             }
38660                             work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
38661                             work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2];
38662 
38663                             /*
38664                              * Update the right-hand side
38665                              */
38666                             k1 = 1+n;
38667                             k2 = j-1+n;
38668                             k3 = 1;
38669                             k4 = j-1;
38670                             vt = -x.ptr.pp_double[1][1];
38671                             ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[k3][j], t->stride, ae_v_len(k1,k2), vt);
38672                             k1 = 1+n2;
38673                             k2 = j-1+n2;
38674                             k3 = 1;
38675                             k4 = j-1;
38676                             vt = -x.ptr.pp_double[1][2];
38677                             ae_v_addd(&work.ptr.p_double[k1], 1, &t->ptr.pp_double[k3][j], t->stride, ae_v_len(k1,k2), vt);
38678                         }
38679                         else
38680                         {
38681 
38682                             /*
38683                              * 2-by-2 diagonal block
38684                              */
38685                             temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j-1][j-1];
38686                             temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j-1][j];
38687                             temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j][j-1];
38688                             temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j][j];
38689                             temp22b.ptr.pp_double[1][1] = work.ptr.p_double[j-1+n];
38690                             temp22b.ptr.pp_double[1][2] = work.ptr.p_double[j-1+n+n];
38691                             temp22b.ptr.pp_double[2][1] = work.ptr.p_double[j+n];
38692                             temp22b.ptr.pp_double[2][2] = work.ptr.p_double[j+n+n];
38693                             evd_internalhsevdlaln2(ae_false, 2, 2, smin, 1.0, &temp22, 1.0, 1.0, &temp22b, wr, wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
38694 
38695                             /*
38696                              * Scale X to avoid overflow when updating
38697                              * the right-hand side.
38698                              */
38699                             if( ae_fp_greater(xnorm,(double)(1)) )
38700                             {
38701                                 beta = ae_maxreal(work.ptr.p_double[j-1], work.ptr.p_double[j], _state);
38702                                 if( ae_fp_greater(beta,bignum/xnorm) )
38703                                 {
38704                                     rec = 1/xnorm;
38705                                     x.ptr.pp_double[1][1] = x.ptr.pp_double[1][1]*rec;
38706                                     x.ptr.pp_double[1][2] = x.ptr.pp_double[1][2]*rec;
38707                                     x.ptr.pp_double[2][1] = x.ptr.pp_double[2][1]*rec;
38708                                     x.ptr.pp_double[2][2] = x.ptr.pp_double[2][2]*rec;
38709                                     scl = scl*rec;
38710                                 }
38711                             }
38712 
38713                             /*
38714                              * Scale if necessary
38715                              */
38716                             if( ae_fp_neq(scl,(double)(1)) )
38717                             {
38718                                 ae_v_muld(&work.ptr.p_double[1+n], 1, ae_v_len(1+n,ki+n), scl);
38719                                 ae_v_muld(&work.ptr.p_double[1+n2], 1, ae_v_len(1+n2,ki+n2), scl);
38720                             }
38721                             work.ptr.p_double[j-1+n] = x.ptr.pp_double[1][1];
38722                             work.ptr.p_double[j+n] = x.ptr.pp_double[2][1];
38723                             work.ptr.p_double[j-1+n2] = x.ptr.pp_double[1][2];
38724                             work.ptr.p_double[j+n2] = x.ptr.pp_double[2][2];
38725 
38726                             /*
38727                              * Update the right-hand side
38728                              */
38729                             vt = -x.ptr.pp_double[1][1];
38730                             ae_v_addd(&work.ptr.p_double[n+1], 1, &t->ptr.pp_double[1][j-1], t->stride, ae_v_len(n+1,n+j-2), vt);
38731                             vt = -x.ptr.pp_double[2][1];
38732                             ae_v_addd(&work.ptr.p_double[n+1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(n+1,n+j-2), vt);
38733                             vt = -x.ptr.pp_double[1][2];
38734                             ae_v_addd(&work.ptr.p_double[n2+1], 1, &t->ptr.pp_double[1][j-1], t->stride, ae_v_len(n2+1,n2+j-2), vt);
38735                             vt = -x.ptr.pp_double[2][2];
38736                             ae_v_addd(&work.ptr.p_double[n2+1], 1, &t->ptr.pp_double[1][j], t->stride, ae_v_len(n2+1,n2+j-2), vt);
38737                         }
38738                     }
38739 
38740                     /*
38741                      * Copy the vector x or Q*x to VR and normalize.
38742                      */
38743                     if( !over )
38744                     {
38745                         ae_v_move(&vr->ptr.pp_double[1][iis-1], vr->stride, &work.ptr.p_double[n+1], 1, ae_v_len(1,ki));
38746                         ae_v_move(&vr->ptr.pp_double[1][iis], vr->stride, &work.ptr.p_double[n2+1], 1, ae_v_len(1,ki));
38747                         emax = (double)(0);
38748                         for(k=1; k<=ki; k++)
38749                         {
38750                             emax = ae_maxreal(emax, ae_fabs(vr->ptr.pp_double[k][iis-1], _state)+ae_fabs(vr->ptr.pp_double[k][iis], _state), _state);
38751                         }
38752                         remax = 1/emax;
38753                         ae_v_muld(&vr->ptr.pp_double[1][iis-1], vr->stride, ae_v_len(1,ki), remax);
38754                         ae_v_muld(&vr->ptr.pp_double[1][iis], vr->stride, ae_v_len(1,ki), remax);
38755                         for(k=ki+1; k<=n; k++)
38756                         {
38757                             vr->ptr.pp_double[k][iis-1] = (double)(0);
38758                             vr->ptr.pp_double[k][iis] = (double)(0);
38759                         }
38760                     }
38761                     else
38762                     {
38763                         if( ki>2 )
38764                         {
38765                             ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n));
38766                             matrixvectormultiply(vr, 1, n, 1, ki-2, ae_false, &work, 1+n, ki-2+n, 1.0, &temp, 1, n, work.ptr.p_double[ki-1+n], _state);
38767                             ae_v_move(&vr->ptr.pp_double[1][ki-1], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
38768                             ae_v_move(&temp.ptr.p_double[1], 1, &vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n));
38769                             matrixvectormultiply(vr, 1, n, 1, ki-2, ae_false, &work, 1+n2, ki-2+n2, 1.0, &temp, 1, n, work.ptr.p_double[ki+n2], _state);
38770                             ae_v_move(&vr->ptr.pp_double[1][ki], vr->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
38771                         }
38772                         else
38773                         {
38774                             vt = work.ptr.p_double[ki-1+n];
38775                             ae_v_muld(&vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n), vt);
38776                             vt = work.ptr.p_double[ki+n2];
38777                             ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), vt);
38778                         }
38779                         emax = (double)(0);
38780                         for(k=1; k<=n; k++)
38781                         {
38782                             emax = ae_maxreal(emax, ae_fabs(vr->ptr.pp_double[k][ki-1], _state)+ae_fabs(vr->ptr.pp_double[k][ki], _state), _state);
38783                         }
38784                         remax = 1/emax;
38785                         ae_v_muld(&vr->ptr.pp_double[1][ki-1], vr->stride, ae_v_len(1,n), remax);
38786                         ae_v_muld(&vr->ptr.pp_double[1][ki], vr->stride, ae_v_len(1,n), remax);
38787                     }
38788                 }
38789                 iis = iis-1;
38790                 if( ip!=0 )
38791                 {
38792                     iis = iis-1;
38793                 }
38794             }
38795             if( ip==1 )
38796             {
38797                 ip = 0;
38798             }
38799             if( ip==-1 )
38800             {
38801                 ip = 1;
38802             }
38803         }
38804     }
38805     if( leftv )
38806     {
38807 
38808         /*
38809          * Compute left eigenvectors.
38810          */
38811         ip = 0;
38812         iis = 1;
38813         for(ki=1; ki<=n; ki++)
38814         {
38815             skipflag = ae_false;
38816             if( ip==-1 )
38817             {
38818                 skipflag = ae_true;
38819             }
38820             else
38821             {
38822                 if( ki!=n )
38823                 {
38824                     if( ae_fp_neq(t->ptr.pp_double[ki+1][ki],(double)(0)) )
38825                     {
38826                         ip = 1;
38827                     }
38828                 }
38829                 if( somev )
38830                 {
38831                     if( !vselect->ptr.p_bool[ki] )
38832                     {
38833                         skipflag = ae_true;
38834                     }
38835                 }
38836             }
38837             if( !skipflag )
38838             {
38839 
38840                 /*
38841                  * Compute the KI-th eigenvalue (WR,WI).
38842                  */
38843                 wr = t->ptr.pp_double[ki][ki];
38844                 wi = (double)(0);
38845                 if( ip!=0 )
38846                 {
38847                     wi = ae_sqrt(ae_fabs(t->ptr.pp_double[ki][ki+1], _state), _state)*ae_sqrt(ae_fabs(t->ptr.pp_double[ki+1][ki], _state), _state);
38848                 }
38849                 smin = ae_maxreal(ulp*(ae_fabs(wr, _state)+ae_fabs(wi, _state)), smlnum, _state);
38850                 if( ip==0 )
38851                 {
38852 
38853                     /*
38854                      * Real left eigenvector.
38855                      */
38856                     work.ptr.p_double[ki+n] = (double)(1);
38857 
38858                     /*
38859                      * Form right-hand side
38860                      */
38861                     for(k=ki+1; k<=n; k++)
38862                     {
38863                         work.ptr.p_double[k+n] = -t->ptr.pp_double[ki][k];
38864                     }
38865 
38866                     /*
38867                      * Solve the quasi-triangular system:
38868                      * (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
38869                      */
38870                     vmax = (double)(1);
38871                     vcrit = bignum;
38872                     jnxt = ki+1;
38873                     for(j=ki+1; j<=n; j++)
38874                     {
38875                         if( j<jnxt )
38876                         {
38877                             continue;
38878                         }
38879                         j1 = j;
38880                         j2 = j;
38881                         jnxt = j+1;
38882                         if( j<n )
38883                         {
38884                             if( ae_fp_neq(t->ptr.pp_double[j+1][j],(double)(0)) )
38885                             {
38886                                 j2 = j+1;
38887                                 jnxt = j+2;
38888                             }
38889                         }
38890                         if( j1==j2 )
38891                         {
38892 
38893                             /*
38894                              * 1-by-1 diagonal block
38895                              *
38896                              * Scale if necessary to avoid overflow when forming
38897                              * the right-hand side.
38898                              */
38899                             if( ae_fp_greater(work.ptr.p_double[j],vcrit) )
38900                             {
38901                                 rec = 1/vmax;
38902                                 ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
38903                                 vmax = (double)(1);
38904                                 vcrit = bignum;
38905                             }
38906                             vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1));
38907                             work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
38908 
38909                             /*
38910                              * Solve (T(J,J)-WR)'*X = WORK
38911                              */
38912                             temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
38913                             temp11b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
38914                             evd_internalhsevdlaln2(ae_false, 1, 1, smin, 1.0, &temp11, 1.0, 1.0, &temp11b, wr, (double)(0), &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
38915 
38916                             /*
38917                              * Scale if necessary
38918                              */
38919                             if( ae_fp_neq(scl,(double)(1)) )
38920                             {
38921                                 ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
38922                             }
38923                             work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
38924                             vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), vmax, _state);
38925                             vcrit = bignum/vmax;
38926                         }
38927                         else
38928                         {
38929 
38930                             /*
38931                              * 2-by-2 diagonal block
38932                              *
38933                              * Scale if necessary to avoid overflow when forming
38934                              * the right-hand side.
38935                              */
38936                             beta = ae_maxreal(work.ptr.p_double[j], work.ptr.p_double[j+1], _state);
38937                             if( ae_fp_greater(beta,vcrit) )
38938                             {
38939                                 rec = 1/vmax;
38940                                 ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
38941                                 vmax = (double)(1);
38942                                 vcrit = bignum;
38943                             }
38944                             vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1));
38945                             work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
38946                             vt = ae_v_dotproduct(&t->ptr.pp_double[ki+1][j+1], t->stride, &work.ptr.p_double[ki+1+n], 1, ae_v_len(ki+1,j-1));
38947                             work.ptr.p_double[j+1+n] = work.ptr.p_double[j+1+n]-vt;
38948 
38949                             /*
38950                              * Solve
38951                              *    [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
38952                              *    [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
38953                              */
38954                             temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
38955                             temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j][j+1];
38956                             temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j+1][j];
38957                             temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j+1][j+1];
38958                             temp21b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
38959                             temp21b.ptr.pp_double[2][1] = work.ptr.p_double[j+1+n];
38960                             evd_internalhsevdlaln2(ae_true, 2, 1, smin, 1.0, &temp22, 1.0, 1.0, &temp21b, wr, (double)(0), &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
38961 
38962                             /*
38963                              * Scale if necessary
38964                              */
38965                             if( ae_fp_neq(scl,(double)(1)) )
38966                             {
38967                                 ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
38968                             }
38969                             work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
38970                             work.ptr.p_double[j+1+n] = x.ptr.pp_double[2][1];
38971                             vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), ae_maxreal(ae_fabs(work.ptr.p_double[j+1+n], _state), vmax, _state), _state);
38972                             vcrit = bignum/vmax;
38973                         }
38974                     }
38975 
38976                     /*
38977                      * Copy the vector x or Q*x to VL and normalize.
38978                      */
38979                     if( !over )
38980                     {
38981                         ae_v_move(&vl->ptr.pp_double[ki][iis], vl->stride, &work.ptr.p_double[ki+n], 1, ae_v_len(ki,n));
38982                         ii = columnidxabsmax(vl, ki, n, iis, _state);
38983                         remax = 1/ae_fabs(vl->ptr.pp_double[ii][iis], _state);
38984                         ae_v_muld(&vl->ptr.pp_double[ki][iis], vl->stride, ae_v_len(ki,n), remax);
38985                         for(k=1; k<=ki-1; k++)
38986                         {
38987                             vl->ptr.pp_double[k][iis] = (double)(0);
38988                         }
38989                     }
38990                     else
38991                     {
38992                         if( ki<n )
38993                         {
38994                             ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n));
38995                             matrixvectormultiply(vl, 1, n, ki+1, n, ae_false, &work, ki+1+n, n+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state);
38996                             ae_v_move(&vl->ptr.pp_double[1][ki], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
38997                         }
38998                         ii = columnidxabsmax(vl, 1, n, ki, _state);
38999                         remax = 1/ae_fabs(vl->ptr.pp_double[ii][ki], _state);
39000                         ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), remax);
39001                     }
39002                 }
39003                 else
39004                 {
39005 
39006                     /*
39007                      * Complex left eigenvector.
39008                      *
39009                      * Initial solve:
39010                      *   ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
39011                      *   ((T(KI+1,KI) T(KI+1,KI+1))                )
39012                      */
39013                     if( ae_fp_greater_eq(ae_fabs(t->ptr.pp_double[ki][ki+1], _state),ae_fabs(t->ptr.pp_double[ki+1][ki], _state)) )
39014                     {
39015                         work.ptr.p_double[ki+n] = wi/t->ptr.pp_double[ki][ki+1];
39016                         work.ptr.p_double[ki+1+n2] = (double)(1);
39017                     }
39018                     else
39019                     {
39020                         work.ptr.p_double[ki+n] = (double)(1);
39021                         work.ptr.p_double[ki+1+n2] = -wi/t->ptr.pp_double[ki+1][ki];
39022                     }
39023                     work.ptr.p_double[ki+1+n] = (double)(0);
39024                     work.ptr.p_double[ki+n2] = (double)(0);
39025 
39026                     /*
39027                      * Form right-hand side
39028                      */
39029                     for(k=ki+2; k<=n; k++)
39030                     {
39031                         work.ptr.p_double[k+n] = -work.ptr.p_double[ki+n]*t->ptr.pp_double[ki][k];
39032                         work.ptr.p_double[k+n2] = -work.ptr.p_double[ki+1+n2]*t->ptr.pp_double[ki+1][k];
39033                     }
39034 
39035                     /*
39036                      * Solve complex quasi-triangular system:
39037                      * ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
39038                      */
39039                     vmax = (double)(1);
39040                     vcrit = bignum;
39041                     jnxt = ki+2;
39042                     for(j=ki+2; j<=n; j++)
39043                     {
39044                         if( j<jnxt )
39045                         {
39046                             continue;
39047                         }
39048                         j1 = j;
39049                         j2 = j;
39050                         jnxt = j+1;
39051                         if( j<n )
39052                         {
39053                             if( ae_fp_neq(t->ptr.pp_double[j+1][j],(double)(0)) )
39054                             {
39055                                 j2 = j+1;
39056                                 jnxt = j+2;
39057                             }
39058                         }
39059                         if( j1==j2 )
39060                         {
39061 
39062                             /*
39063                              * 1-by-1 diagonal block
39064                              *
39065                              * Scale if necessary to avoid overflow when
39066                              * forming the right-hand side elements.
39067                              */
39068                             if( ae_fp_greater(work.ptr.p_double[j],vcrit) )
39069                             {
39070                                 rec = 1/vmax;
39071                                 ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
39072                                 ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), rec);
39073                                 vmax = (double)(1);
39074                                 vcrit = bignum;
39075                             }
39076                             vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1));
39077                             work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
39078                             vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1));
39079                             work.ptr.p_double[j+n2] = work.ptr.p_double[j+n2]-vt;
39080 
39081                             /*
39082                              * Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2
39083                              */
39084                             temp11.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
39085                             temp12b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
39086                             temp12b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n];
39087                             evd_internalhsevdlaln2(ae_false, 1, 2, smin, 1.0, &temp11, 1.0, 1.0, &temp12b, wr, -wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
39088 
39089                             /*
39090                              * Scale if necessary
39091                              */
39092                             if( ae_fp_neq(scl,(double)(1)) )
39093                             {
39094                                 ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
39095                                 ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), scl);
39096                             }
39097                             work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
39098                             work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2];
39099                             vmax = ae_maxreal(ae_fabs(work.ptr.p_double[j+n], _state), ae_maxreal(ae_fabs(work.ptr.p_double[j+n2], _state), vmax, _state), _state);
39100                             vcrit = bignum/vmax;
39101                         }
39102                         else
39103                         {
39104 
39105                             /*
39106                              * 2-by-2 diagonal block
39107                              *
39108                              * Scale if necessary to avoid overflow when forming
39109                              * the right-hand side elements.
39110                              */
39111                             beta = ae_maxreal(work.ptr.p_double[j], work.ptr.p_double[j+1], _state);
39112                             if( ae_fp_greater(beta,vcrit) )
39113                             {
39114                                 rec = 1/vmax;
39115                                 ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), rec);
39116                                 ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), rec);
39117                                 vmax = (double)(1);
39118                                 vcrit = bignum;
39119                             }
39120                             vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1));
39121                             work.ptr.p_double[j+n] = work.ptr.p_double[j+n]-vt;
39122                             vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1));
39123                             work.ptr.p_double[j+n2] = work.ptr.p_double[j+n2]-vt;
39124                             vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j+1], t->stride, &work.ptr.p_double[ki+2+n], 1, ae_v_len(ki+2,j-1));
39125                             work.ptr.p_double[j+1+n] = work.ptr.p_double[j+1+n]-vt;
39126                             vt = ae_v_dotproduct(&t->ptr.pp_double[ki+2][j+1], t->stride, &work.ptr.p_double[ki+2+n2], 1, ae_v_len(ki+2,j-1));
39127                             work.ptr.p_double[j+1+n2] = work.ptr.p_double[j+1+n2]-vt;
39128 
39129                             /*
39130                              * Solve 2-by-2 complex linear equation
39131                              *   ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
39132                              *   ([T(j+1,j) T(j+1,j+1)]             )
39133                              */
39134                             temp22.ptr.pp_double[1][1] = t->ptr.pp_double[j][j];
39135                             temp22.ptr.pp_double[1][2] = t->ptr.pp_double[j][j+1];
39136                             temp22.ptr.pp_double[2][1] = t->ptr.pp_double[j+1][j];
39137                             temp22.ptr.pp_double[2][2] = t->ptr.pp_double[j+1][j+1];
39138                             temp22b.ptr.pp_double[1][1] = work.ptr.p_double[j+n];
39139                             temp22b.ptr.pp_double[1][2] = work.ptr.p_double[j+n+n];
39140                             temp22b.ptr.pp_double[2][1] = work.ptr.p_double[j+1+n];
39141                             temp22b.ptr.pp_double[2][2] = work.ptr.p_double[j+1+n+n];
39142                             evd_internalhsevdlaln2(ae_true, 2, 2, smin, 1.0, &temp22, 1.0, 1.0, &temp22b, wr, -wi, &rswap4, &zswap4, &ipivot44, &civ4, &crv4, &x, &scl, &xnorm, &ierr, _state);
39143 
39144                             /*
39145                              * Scale if necessary
39146                              */
39147                             if( ae_fp_neq(scl,(double)(1)) )
39148                             {
39149                                 ae_v_muld(&work.ptr.p_double[ki+n], 1, ae_v_len(ki+n,n+n), scl);
39150                                 ae_v_muld(&work.ptr.p_double[ki+n2], 1, ae_v_len(ki+n2,n+n2), scl);
39151                             }
39152                             work.ptr.p_double[j+n] = x.ptr.pp_double[1][1];
39153                             work.ptr.p_double[j+n2] = x.ptr.pp_double[1][2];
39154                             work.ptr.p_double[j+1+n] = x.ptr.pp_double[2][1];
39155                             work.ptr.p_double[j+1+n2] = x.ptr.pp_double[2][2];
39156                             vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[1][1], _state), vmax, _state);
39157                             vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[1][2], _state), vmax, _state);
39158                             vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[2][1], _state), vmax, _state);
39159                             vmax = ae_maxreal(ae_fabs(x.ptr.pp_double[2][2], _state), vmax, _state);
39160                             vcrit = bignum/vmax;
39161                         }
39162                     }
39163 
39164                     /*
39165                      * Copy the vector x or Q*x to VL and normalize.
39166                      */
39167                     if( !over )
39168                     {
39169                         ae_v_move(&vl->ptr.pp_double[ki][iis], vl->stride, &work.ptr.p_double[ki+n], 1, ae_v_len(ki,n));
39170                         ae_v_move(&vl->ptr.pp_double[ki][iis+1], vl->stride, &work.ptr.p_double[ki+n2], 1, ae_v_len(ki,n));
39171                         emax = (double)(0);
39172                         for(k=ki; k<=n; k++)
39173                         {
39174                             emax = ae_maxreal(emax, ae_fabs(vl->ptr.pp_double[k][iis], _state)+ae_fabs(vl->ptr.pp_double[k][iis+1], _state), _state);
39175                         }
39176                         remax = 1/emax;
39177                         ae_v_muld(&vl->ptr.pp_double[ki][iis], vl->stride, ae_v_len(ki,n), remax);
39178                         ae_v_muld(&vl->ptr.pp_double[ki][iis+1], vl->stride, ae_v_len(ki,n), remax);
39179                         for(k=1; k<=ki-1; k++)
39180                         {
39181                             vl->ptr.pp_double[k][iis] = (double)(0);
39182                             vl->ptr.pp_double[k][iis+1] = (double)(0);
39183                         }
39184                     }
39185                     else
39186                     {
39187                         if( ki<n-1 )
39188                         {
39189                             ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n));
39190                             matrixvectormultiply(vl, 1, n, ki+2, n, ae_false, &work, ki+2+n, n+n, 1.0, &temp, 1, n, work.ptr.p_double[ki+n], _state);
39191                             ae_v_move(&vl->ptr.pp_double[1][ki], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
39192                             ae_v_move(&temp.ptr.p_double[1], 1, &vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n));
39193                             matrixvectormultiply(vl, 1, n, ki+2, n, ae_false, &work, ki+2+n2, n+n2, 1.0, &temp, 1, n, work.ptr.p_double[ki+1+n2], _state);
39194                             ae_v_move(&vl->ptr.pp_double[1][ki+1], vl->stride, &temp.ptr.p_double[1], 1, ae_v_len(1,n));
39195                         }
39196                         else
39197                         {
39198                             vt = work.ptr.p_double[ki+n];
39199                             ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), vt);
39200                             vt = work.ptr.p_double[ki+1+n2];
39201                             ae_v_muld(&vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n), vt);
39202                         }
39203                         emax = (double)(0);
39204                         for(k=1; k<=n; k++)
39205                         {
39206                             emax = ae_maxreal(emax, ae_fabs(vl->ptr.pp_double[k][ki], _state)+ae_fabs(vl->ptr.pp_double[k][ki+1], _state), _state);
39207                         }
39208                         remax = 1/emax;
39209                         ae_v_muld(&vl->ptr.pp_double[1][ki], vl->stride, ae_v_len(1,n), remax);
39210                         ae_v_muld(&vl->ptr.pp_double[1][ki+1], vl->stride, ae_v_len(1,n), remax);
39211                     }
39212                 }
39213                 iis = iis+1;
39214                 if( ip!=0 )
39215                 {
39216                     iis = iis+1;
39217                 }
39218             }
39219             if( ip==-1 )
39220             {
39221                 ip = 0;
39222             }
39223             if( ip==1 )
39224             {
39225                 ip = -1;
39226             }
39227         }
39228     }
39229     ae_frame_leave(_state);
39230 }
39231 
39232 
39233 /*************************************************************************
39234 DLALN2 solves a system of the form  (ca A - w D ) X = s B
39235 or (ca A' - w D) X = s B   with possible scaling ("s") and
39236 perturbation of A.  (A' means A-transpose.)
39237 
39238 A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
39239 real diagonal matrix, w is a real or complex value, and X and B are
39240 NA x 1 matrices -- real if w is real, complex if w is complex.  NA
39241 may be 1 or 2.
39242 
39243 If w is complex, X and B are represented as NA x 2 matrices,
39244 the first column of each being the real part and the second
39245 being the imaginary part.
39246 
39247 "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
39248 so chosen that X can be computed without overflow.  X is further
39249 scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
39250 than overflow.
39251 
39252 If both singular values of (ca A - w D) are less than SMIN,
39253 SMIN*identity will be used instead of (ca A - w D).  If only one
39254 singular value is less than SMIN, one element of (ca A - w D) will be
39255 perturbed enough to make the smallest singular value roughly SMIN.
39256 If both singular values are at least SMIN, (ca A - w D) will not be
39257 perturbed.  In any case, the perturbation will be at most some small
39258 multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
39259 are computed by infinity-norm approximations, and thus will only be
39260 correct to a factor of 2 or so.
39261 
39262 Note: all input quantities are assumed to be smaller than overflow
39263 by a reasonable factor.  (See BIGNUM.)
39264 
39265   -- LAPACK auxiliary routine (version 3.0) --
39266      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
39267      Courant Institute, Argonne National Lab, and Rice University
39268      October 31, 1992
39269 *************************************************************************/
evd_internalhsevdlaln2(ae_bool ltrans,ae_int_t na,ae_int_t nw,double smin,double ca,ae_matrix * a,double d1,double d2,ae_matrix * b,double wr,double wi,ae_vector * rswap4,ae_vector * zswap4,ae_matrix * ipivot44,ae_vector * civ4,ae_vector * crv4,ae_matrix * x,double * scl,double * xnorm,ae_int_t * info,ae_state * _state)39270 static void evd_internalhsevdlaln2(ae_bool ltrans,
39271      ae_int_t na,
39272      ae_int_t nw,
39273      double smin,
39274      double ca,
39275      /* Real    */ ae_matrix* a,
39276      double d1,
39277      double d2,
39278      /* Real    */ ae_matrix* b,
39279      double wr,
39280      double wi,
39281      /* Boolean */ ae_vector* rswap4,
39282      /* Boolean */ ae_vector* zswap4,
39283      /* Integer */ ae_matrix* ipivot44,
39284      /* Real    */ ae_vector* civ4,
39285      /* Real    */ ae_vector* crv4,
39286      /* Real    */ ae_matrix* x,
39287      double* scl,
39288      double* xnorm,
39289      ae_int_t* info,
39290      ae_state *_state)
39291 {
39292     ae_int_t icmax;
39293     ae_int_t j;
39294     double bbnd;
39295     double bi1;
39296     double bi2;
39297     double bignum;
39298     double bnorm;
39299     double br1;
39300     double br2;
39301     double ci21;
39302     double ci22;
39303     double cmax;
39304     double cnorm;
39305     double cr21;
39306     double cr22;
39307     double csi;
39308     double csr;
39309     double li21;
39310     double lr21;
39311     double smini;
39312     double smlnum;
39313     double temp;
39314     double u22abs;
39315     double ui11;
39316     double ui11r;
39317     double ui12;
39318     double ui12s;
39319     double ui22;
39320     double ur11;
39321     double ur11r;
39322     double ur12;
39323     double ur12s;
39324     double ur22;
39325     double xi1;
39326     double xi2;
39327     double xr1;
39328     double xr2;
39329     double tmp1;
39330     double tmp2;
39331 
39332     *scl = 0;
39333     *xnorm = 0;
39334     *info = 0;
39335 
39336     zswap4->ptr.p_bool[1] = ae_false;
39337     zswap4->ptr.p_bool[2] = ae_false;
39338     zswap4->ptr.p_bool[3] = ae_true;
39339     zswap4->ptr.p_bool[4] = ae_true;
39340     rswap4->ptr.p_bool[1] = ae_false;
39341     rswap4->ptr.p_bool[2] = ae_true;
39342     rswap4->ptr.p_bool[3] = ae_false;
39343     rswap4->ptr.p_bool[4] = ae_true;
39344     ipivot44->ptr.pp_int[1][1] = 1;
39345     ipivot44->ptr.pp_int[2][1] = 2;
39346     ipivot44->ptr.pp_int[3][1] = 3;
39347     ipivot44->ptr.pp_int[4][1] = 4;
39348     ipivot44->ptr.pp_int[1][2] = 2;
39349     ipivot44->ptr.pp_int[2][2] = 1;
39350     ipivot44->ptr.pp_int[3][2] = 4;
39351     ipivot44->ptr.pp_int[4][2] = 3;
39352     ipivot44->ptr.pp_int[1][3] = 3;
39353     ipivot44->ptr.pp_int[2][3] = 4;
39354     ipivot44->ptr.pp_int[3][3] = 1;
39355     ipivot44->ptr.pp_int[4][3] = 2;
39356     ipivot44->ptr.pp_int[1][4] = 4;
39357     ipivot44->ptr.pp_int[2][4] = 3;
39358     ipivot44->ptr.pp_int[3][4] = 2;
39359     ipivot44->ptr.pp_int[4][4] = 1;
39360     smlnum = 2*ae_minrealnumber;
39361     bignum = 1/smlnum;
39362     smini = ae_maxreal(smin, smlnum, _state);
39363 
39364     /*
39365      * Don't check for input errors
39366      */
39367     *info = 0;
39368 
39369     /*
39370      * Standard Initializations
39371      */
39372     *scl = (double)(1);
39373     if( na==1 )
39374     {
39375 
39376         /*
39377          * 1 x 1  (i.e., scalar) system   C X = B
39378          */
39379         if( nw==1 )
39380         {
39381 
39382             /*
39383              * Real 1x1 system.
39384              *
39385              * C = ca A - w D
39386              */
39387             csr = ca*a->ptr.pp_double[1][1]-wr*d1;
39388             cnorm = ae_fabs(csr, _state);
39389 
39390             /*
39391              * If | C | < SMINI, use C = SMINI
39392              */
39393             if( ae_fp_less(cnorm,smini) )
39394             {
39395                 csr = smini;
39396                 cnorm = smini;
39397                 *info = 1;
39398             }
39399 
39400             /*
39401              * Check scaling for  X = B / C
39402              */
39403             bnorm = ae_fabs(b->ptr.pp_double[1][1], _state);
39404             if( ae_fp_less(cnorm,(double)(1))&&ae_fp_greater(bnorm,(double)(1)) )
39405             {
39406                 if( ae_fp_greater(bnorm,bignum*cnorm) )
39407                 {
39408                     *scl = 1/bnorm;
39409                 }
39410             }
39411 
39412             /*
39413              * Compute X
39414              */
39415             x->ptr.pp_double[1][1] = b->ptr.pp_double[1][1]*(*scl)/csr;
39416             *xnorm = ae_fabs(x->ptr.pp_double[1][1], _state);
39417         }
39418         else
39419         {
39420 
39421             /*
39422              * Complex 1x1 system (w is complex)
39423              *
39424              * C = ca A - w D
39425              */
39426             csr = ca*a->ptr.pp_double[1][1]-wr*d1;
39427             csi = -wi*d1;
39428             cnorm = ae_fabs(csr, _state)+ae_fabs(csi, _state);
39429 
39430             /*
39431              * If | C | < SMINI, use C = SMINI
39432              */
39433             if( ae_fp_less(cnorm,smini) )
39434             {
39435                 csr = smini;
39436                 csi = (double)(0);
39437                 cnorm = smini;
39438                 *info = 1;
39439             }
39440 
39441             /*
39442              * Check scaling for  X = B / C
39443              */
39444             bnorm = ae_fabs(b->ptr.pp_double[1][1], _state)+ae_fabs(b->ptr.pp_double[1][2], _state);
39445             if( ae_fp_less(cnorm,(double)(1))&&ae_fp_greater(bnorm,(double)(1)) )
39446             {
39447                 if( ae_fp_greater(bnorm,bignum*cnorm) )
39448                 {
39449                     *scl = 1/bnorm;
39450                 }
39451             }
39452 
39453             /*
39454              * Compute X
39455              */
39456             evd_internalhsevdladiv(*scl*b->ptr.pp_double[1][1], *scl*b->ptr.pp_double[1][2], csr, csi, &tmp1, &tmp2, _state);
39457             x->ptr.pp_double[1][1] = tmp1;
39458             x->ptr.pp_double[1][2] = tmp2;
39459             *xnorm = ae_fabs(x->ptr.pp_double[1][1], _state)+ae_fabs(x->ptr.pp_double[1][2], _state);
39460         }
39461     }
39462     else
39463     {
39464 
39465         /*
39466          * 2x2 System
39467          *
39468          * Compute the real part of  C = ca A - w D  (or  ca A' - w D )
39469          */
39470         crv4->ptr.p_double[1+0] = ca*a->ptr.pp_double[1][1]-wr*d1;
39471         crv4->ptr.p_double[2+2] = ca*a->ptr.pp_double[2][2]-wr*d2;
39472         if( ltrans )
39473         {
39474             crv4->ptr.p_double[1+2] = ca*a->ptr.pp_double[2][1];
39475             crv4->ptr.p_double[2+0] = ca*a->ptr.pp_double[1][2];
39476         }
39477         else
39478         {
39479             crv4->ptr.p_double[2+0] = ca*a->ptr.pp_double[2][1];
39480             crv4->ptr.p_double[1+2] = ca*a->ptr.pp_double[1][2];
39481         }
39482         if( nw==1 )
39483         {
39484 
39485             /*
39486              * Real 2x2 system  (w is real)
39487              *
39488              * Find the largest element in C
39489              */
39490             cmax = (double)(0);
39491             icmax = 0;
39492             for(j=1; j<=4; j++)
39493             {
39494                 if( ae_fp_greater(ae_fabs(crv4->ptr.p_double[j], _state),cmax) )
39495                 {
39496                     cmax = ae_fabs(crv4->ptr.p_double[j], _state);
39497                     icmax = j;
39498                 }
39499             }
39500 
39501             /*
39502              * If norm(C) < SMINI, use SMINI*identity.
39503              */
39504             if( ae_fp_less(cmax,smini) )
39505             {
39506                 bnorm = ae_maxreal(ae_fabs(b->ptr.pp_double[1][1], _state), ae_fabs(b->ptr.pp_double[2][1], _state), _state);
39507                 if( ae_fp_less(smini,(double)(1))&&ae_fp_greater(bnorm,(double)(1)) )
39508                 {
39509                     if( ae_fp_greater(bnorm,bignum*smini) )
39510                     {
39511                         *scl = 1/bnorm;
39512                     }
39513                 }
39514                 temp = *scl/smini;
39515                 x->ptr.pp_double[1][1] = temp*b->ptr.pp_double[1][1];
39516                 x->ptr.pp_double[2][1] = temp*b->ptr.pp_double[2][1];
39517                 *xnorm = temp*bnorm;
39518                 *info = 1;
39519                 return;
39520             }
39521 
39522             /*
39523              * Gaussian elimination with complete pivoting.
39524              */
39525             ur11 = crv4->ptr.p_double[icmax];
39526             cr21 = crv4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]];
39527             ur12 = crv4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]];
39528             cr22 = crv4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]];
39529             ur11r = 1/ur11;
39530             lr21 = ur11r*cr21;
39531             ur22 = cr22-ur12*lr21;
39532 
39533             /*
39534              * If smaller pivot < SMINI, use SMINI
39535              */
39536             if( ae_fp_less(ae_fabs(ur22, _state),smini) )
39537             {
39538                 ur22 = smini;
39539                 *info = 1;
39540             }
39541             if( rswap4->ptr.p_bool[icmax] )
39542             {
39543                 br1 = b->ptr.pp_double[2][1];
39544                 br2 = b->ptr.pp_double[1][1];
39545             }
39546             else
39547             {
39548                 br1 = b->ptr.pp_double[1][1];
39549                 br2 = b->ptr.pp_double[2][1];
39550             }
39551             br2 = br2-lr21*br1;
39552             bbnd = ae_maxreal(ae_fabs(br1*(ur22*ur11r), _state), ae_fabs(br2, _state), _state);
39553             if( ae_fp_greater(bbnd,(double)(1))&&ae_fp_less(ae_fabs(ur22, _state),(double)(1)) )
39554             {
39555                 if( ae_fp_greater_eq(bbnd,bignum*ae_fabs(ur22, _state)) )
39556                 {
39557                     *scl = 1/bbnd;
39558                 }
39559             }
39560             xr2 = br2*(*scl)/ur22;
39561             xr1 = *scl*br1*ur11r-xr2*(ur11r*ur12);
39562             if( zswap4->ptr.p_bool[icmax] )
39563             {
39564                 x->ptr.pp_double[1][1] = xr2;
39565                 x->ptr.pp_double[2][1] = xr1;
39566             }
39567             else
39568             {
39569                 x->ptr.pp_double[1][1] = xr1;
39570                 x->ptr.pp_double[2][1] = xr2;
39571             }
39572             *xnorm = ae_maxreal(ae_fabs(xr1, _state), ae_fabs(xr2, _state), _state);
39573 
39574             /*
39575              * Further scaling if  norm(A) norm(X) > overflow
39576              */
39577             if( ae_fp_greater(*xnorm,(double)(1))&&ae_fp_greater(cmax,(double)(1)) )
39578             {
39579                 if( ae_fp_greater(*xnorm,bignum/cmax) )
39580                 {
39581                     temp = cmax/bignum;
39582                     x->ptr.pp_double[1][1] = temp*x->ptr.pp_double[1][1];
39583                     x->ptr.pp_double[2][1] = temp*x->ptr.pp_double[2][1];
39584                     *xnorm = temp*(*xnorm);
39585                     *scl = temp*(*scl);
39586                 }
39587             }
39588         }
39589         else
39590         {
39591 
39592             /*
39593              * Complex 2x2 system  (w is complex)
39594              *
39595              * Find the largest element in C
39596              */
39597             civ4->ptr.p_double[1+0] = -wi*d1;
39598             civ4->ptr.p_double[2+0] = (double)(0);
39599             civ4->ptr.p_double[1+2] = (double)(0);
39600             civ4->ptr.p_double[2+2] = -wi*d2;
39601             cmax = (double)(0);
39602             icmax = 0;
39603             for(j=1; j<=4; j++)
39604             {
39605                 if( ae_fp_greater(ae_fabs(crv4->ptr.p_double[j], _state)+ae_fabs(civ4->ptr.p_double[j], _state),cmax) )
39606                 {
39607                     cmax = ae_fabs(crv4->ptr.p_double[j], _state)+ae_fabs(civ4->ptr.p_double[j], _state);
39608                     icmax = j;
39609                 }
39610             }
39611 
39612             /*
39613              * If norm(C) < SMINI, use SMINI*identity.
39614              */
39615             if( ae_fp_less(cmax,smini) )
39616             {
39617                 bnorm = ae_maxreal(ae_fabs(b->ptr.pp_double[1][1], _state)+ae_fabs(b->ptr.pp_double[1][2], _state), ae_fabs(b->ptr.pp_double[2][1], _state)+ae_fabs(b->ptr.pp_double[2][2], _state), _state);
39618                 if( ae_fp_less(smini,(double)(1))&&ae_fp_greater(bnorm,(double)(1)) )
39619                 {
39620                     if( ae_fp_greater(bnorm,bignum*smini) )
39621                     {
39622                         *scl = 1/bnorm;
39623                     }
39624                 }
39625                 temp = *scl/smini;
39626                 x->ptr.pp_double[1][1] = temp*b->ptr.pp_double[1][1];
39627                 x->ptr.pp_double[2][1] = temp*b->ptr.pp_double[2][1];
39628                 x->ptr.pp_double[1][2] = temp*b->ptr.pp_double[1][2];
39629                 x->ptr.pp_double[2][2] = temp*b->ptr.pp_double[2][2];
39630                 *xnorm = temp*bnorm;
39631                 *info = 1;
39632                 return;
39633             }
39634 
39635             /*
39636              * Gaussian elimination with complete pivoting.
39637              */
39638             ur11 = crv4->ptr.p_double[icmax];
39639             ui11 = civ4->ptr.p_double[icmax];
39640             cr21 = crv4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]];
39641             ci21 = civ4->ptr.p_double[ipivot44->ptr.pp_int[2][icmax]];
39642             ur12 = crv4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]];
39643             ui12 = civ4->ptr.p_double[ipivot44->ptr.pp_int[3][icmax]];
39644             cr22 = crv4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]];
39645             ci22 = civ4->ptr.p_double[ipivot44->ptr.pp_int[4][icmax]];
39646             if( icmax==1||icmax==4 )
39647             {
39648 
39649                 /*
39650                  * Code when off-diagonals of pivoted C are real
39651                  */
39652                 if( ae_fp_greater(ae_fabs(ur11, _state),ae_fabs(ui11, _state)) )
39653                 {
39654                     temp = ui11/ur11;
39655                     ur11r = 1/(ur11*(1+ae_sqr(temp, _state)));
39656                     ui11r = -temp*ur11r;
39657                 }
39658                 else
39659                 {
39660                     temp = ur11/ui11;
39661                     ui11r = -1/(ui11*(1+ae_sqr(temp, _state)));
39662                     ur11r = -temp*ui11r;
39663                 }
39664                 lr21 = cr21*ur11r;
39665                 li21 = cr21*ui11r;
39666                 ur12s = ur12*ur11r;
39667                 ui12s = ur12*ui11r;
39668                 ur22 = cr22-ur12*lr21;
39669                 ui22 = ci22-ur12*li21;
39670             }
39671             else
39672             {
39673 
39674                 /*
39675                  * Code when diagonals of pivoted C are real
39676                  */
39677                 ur11r = 1/ur11;
39678                 ui11r = (double)(0);
39679                 lr21 = cr21*ur11r;
39680                 li21 = ci21*ur11r;
39681                 ur12s = ur12*ur11r;
39682                 ui12s = ui12*ur11r;
39683                 ur22 = cr22-ur12*lr21+ui12*li21;
39684                 ui22 = -ur12*li21-ui12*lr21;
39685             }
39686             u22abs = ae_fabs(ur22, _state)+ae_fabs(ui22, _state);
39687 
39688             /*
39689              * If smaller pivot < SMINI, use SMINI
39690              */
39691             if( ae_fp_less(u22abs,smini) )
39692             {
39693                 ur22 = smini;
39694                 ui22 = (double)(0);
39695                 *info = 1;
39696             }
39697             if( rswap4->ptr.p_bool[icmax] )
39698             {
39699                 br2 = b->ptr.pp_double[1][1];
39700                 br1 = b->ptr.pp_double[2][1];
39701                 bi2 = b->ptr.pp_double[1][2];
39702                 bi1 = b->ptr.pp_double[2][2];
39703             }
39704             else
39705             {
39706                 br1 = b->ptr.pp_double[1][1];
39707                 br2 = b->ptr.pp_double[2][1];
39708                 bi1 = b->ptr.pp_double[1][2];
39709                 bi2 = b->ptr.pp_double[2][2];
39710             }
39711             br2 = br2-lr21*br1+li21*bi1;
39712             bi2 = bi2-li21*br1-lr21*bi1;
39713             bbnd = ae_maxreal((ae_fabs(br1, _state)+ae_fabs(bi1, _state))*(u22abs*(ae_fabs(ur11r, _state)+ae_fabs(ui11r, _state))), ae_fabs(br2, _state)+ae_fabs(bi2, _state), _state);
39714             if( ae_fp_greater(bbnd,(double)(1))&&ae_fp_less(u22abs,(double)(1)) )
39715             {
39716                 if( ae_fp_greater_eq(bbnd,bignum*u22abs) )
39717                 {
39718                     *scl = 1/bbnd;
39719                     br1 = *scl*br1;
39720                     bi1 = *scl*bi1;
39721                     br2 = *scl*br2;
39722                     bi2 = *scl*bi2;
39723                 }
39724             }
39725             evd_internalhsevdladiv(br2, bi2, ur22, ui22, &xr2, &xi2, _state);
39726             xr1 = ur11r*br1-ui11r*bi1-ur12s*xr2+ui12s*xi2;
39727             xi1 = ui11r*br1+ur11r*bi1-ui12s*xr2-ur12s*xi2;
39728             if( zswap4->ptr.p_bool[icmax] )
39729             {
39730                 x->ptr.pp_double[1][1] = xr2;
39731                 x->ptr.pp_double[2][1] = xr1;
39732                 x->ptr.pp_double[1][2] = xi2;
39733                 x->ptr.pp_double[2][2] = xi1;
39734             }
39735             else
39736             {
39737                 x->ptr.pp_double[1][1] = xr1;
39738                 x->ptr.pp_double[2][1] = xr2;
39739                 x->ptr.pp_double[1][2] = xi1;
39740                 x->ptr.pp_double[2][2] = xi2;
39741             }
39742             *xnorm = ae_maxreal(ae_fabs(xr1, _state)+ae_fabs(xi1, _state), ae_fabs(xr2, _state)+ae_fabs(xi2, _state), _state);
39743 
39744             /*
39745              * Further scaling if  norm(A) norm(X) > overflow
39746              */
39747             if( ae_fp_greater(*xnorm,(double)(1))&&ae_fp_greater(cmax,(double)(1)) )
39748             {
39749                 if( ae_fp_greater(*xnorm,bignum/cmax) )
39750                 {
39751                     temp = cmax/bignum;
39752                     x->ptr.pp_double[1][1] = temp*x->ptr.pp_double[1][1];
39753                     x->ptr.pp_double[2][1] = temp*x->ptr.pp_double[2][1];
39754                     x->ptr.pp_double[1][2] = temp*x->ptr.pp_double[1][2];
39755                     x->ptr.pp_double[2][2] = temp*x->ptr.pp_double[2][2];
39756                     *xnorm = temp*(*xnorm);
39757                     *scl = temp*(*scl);
39758                 }
39759             }
39760         }
39761     }
39762 }
39763 
39764 
39765 /*************************************************************************
39766 performs complex division in  real arithmetic
39767 
39768                         a + i*b
39769              p + i*q = ---------
39770                         c + i*d
39771 
39772 The algorithm is due to Robert L. Smith and can be found
39773 in D. Knuth, The art of Computer Programming, Vol.2, p.195
39774 
39775   -- LAPACK auxiliary routine (version 3.0) --
39776      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
39777      Courant Institute, Argonne National Lab, and Rice University
39778      October 31, 1992
39779 *************************************************************************/
evd_internalhsevdladiv(double a,double b,double c,double d,double * p,double * q,ae_state * _state)39780 static void evd_internalhsevdladiv(double a,
39781      double b,
39782      double c,
39783      double d,
39784      double* p,
39785      double* q,
39786      ae_state *_state)
39787 {
39788     double e;
39789     double f;
39790 
39791     *p = 0;
39792     *q = 0;
39793 
39794     if( ae_fp_less(ae_fabs(d, _state),ae_fabs(c, _state)) )
39795     {
39796         e = d/c;
39797         f = c+d*e;
39798         *p = (a+b*e)/f;
39799         *q = (b-a*e)/f;
39800     }
39801     else
39802     {
39803         e = c/d;
39804         f = d+c*e;
39805         *p = (b+a*e)/f;
39806         *q = (-a+b*e)/f;
39807     }
39808 }
39809 
39810 
_eigsubspacestate_init(void * _p,ae_state * _state,ae_bool make_automatic)39811 void _eigsubspacestate_init(void* _p, ae_state *_state, ae_bool make_automatic)
39812 {
39813     eigsubspacestate *p = (eigsubspacestate*)_p;
39814     ae_touch_ptr((void*)p);
39815     _hqrndstate_init(&p->rs, _state, make_automatic);
39816     ae_vector_init(&p->tau, 0, DT_REAL, _state, make_automatic);
39817     ae_matrix_init(&p->q0, 0, 0, DT_REAL, _state, make_automatic);
39818     ae_matrix_init(&p->qcur, 0, 0, DT_REAL, _state, make_automatic);
39819     ae_matrix_init(&p->qnew, 0, 0, DT_REAL, _state, make_automatic);
39820     ae_matrix_init(&p->znew, 0, 0, DT_REAL, _state, make_automatic);
39821     ae_matrix_init(&p->r, 0, 0, DT_REAL, _state, make_automatic);
39822     ae_matrix_init(&p->rz, 0, 0, DT_REAL, _state, make_automatic);
39823     ae_matrix_init(&p->tz, 0, 0, DT_REAL, _state, make_automatic);
39824     ae_matrix_init(&p->rq, 0, 0, DT_REAL, _state, make_automatic);
39825     ae_matrix_init(&p->dummy, 0, 0, DT_REAL, _state, make_automatic);
39826     ae_vector_init(&p->rw, 0, DT_REAL, _state, make_automatic);
39827     ae_vector_init(&p->tw, 0, DT_REAL, _state, make_automatic);
39828     ae_vector_init(&p->wcur, 0, DT_REAL, _state, make_automatic);
39829     ae_vector_init(&p->wprev, 0, DT_REAL, _state, make_automatic);
39830     ae_vector_init(&p->wrank, 0, DT_REAL, _state, make_automatic);
39831     _apbuffers_init(&p->buf, _state, make_automatic);
39832     ae_matrix_init(&p->x, 0, 0, DT_REAL, _state, make_automatic);
39833     ae_matrix_init(&p->ax, 0, 0, DT_REAL, _state, make_automatic);
39834     _rcommstate_init(&p->rstate, _state, make_automatic);
39835 }
39836 
39837 
_eigsubspacestate_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)39838 void _eigsubspacestate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
39839 {
39840     eigsubspacestate *dst = (eigsubspacestate*)_dst;
39841     eigsubspacestate *src = (eigsubspacestate*)_src;
39842     dst->n = src->n;
39843     dst->k = src->k;
39844     dst->nwork = src->nwork;
39845     dst->maxits = src->maxits;
39846     dst->eps = src->eps;
39847     dst->eigenvectorsneeded = src->eigenvectorsneeded;
39848     dst->matrixtype = src->matrixtype;
39849     dst->usewarmstart = src->usewarmstart;
39850     dst->firstcall = src->firstcall;
39851     _hqrndstate_init_copy(&dst->rs, &src->rs, _state, make_automatic);
39852     dst->running = src->running;
39853     ae_vector_init_copy(&dst->tau, &src->tau, _state, make_automatic);
39854     ae_matrix_init_copy(&dst->q0, &src->q0, _state, make_automatic);
39855     ae_matrix_init_copy(&dst->qcur, &src->qcur, _state, make_automatic);
39856     ae_matrix_init_copy(&dst->qnew, &src->qnew, _state, make_automatic);
39857     ae_matrix_init_copy(&dst->znew, &src->znew, _state, make_automatic);
39858     ae_matrix_init_copy(&dst->r, &src->r, _state, make_automatic);
39859     ae_matrix_init_copy(&dst->rz, &src->rz, _state, make_automatic);
39860     ae_matrix_init_copy(&dst->tz, &src->tz, _state, make_automatic);
39861     ae_matrix_init_copy(&dst->rq, &src->rq, _state, make_automatic);
39862     ae_matrix_init_copy(&dst->dummy, &src->dummy, _state, make_automatic);
39863     ae_vector_init_copy(&dst->rw, &src->rw, _state, make_automatic);
39864     ae_vector_init_copy(&dst->tw, &src->tw, _state, make_automatic);
39865     ae_vector_init_copy(&dst->wcur, &src->wcur, _state, make_automatic);
39866     ae_vector_init_copy(&dst->wprev, &src->wprev, _state, make_automatic);
39867     ae_vector_init_copy(&dst->wrank, &src->wrank, _state, make_automatic);
39868     _apbuffers_init_copy(&dst->buf, &src->buf, _state, make_automatic);
39869     ae_matrix_init_copy(&dst->x, &src->x, _state, make_automatic);
39870     ae_matrix_init_copy(&dst->ax, &src->ax, _state, make_automatic);
39871     dst->requesttype = src->requesttype;
39872     dst->requestsize = src->requestsize;
39873     dst->repiterationscount = src->repiterationscount;
39874     _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
39875 }
39876 
39877 
_eigsubspacestate_clear(void * _p)39878 void _eigsubspacestate_clear(void* _p)
39879 {
39880     eigsubspacestate *p = (eigsubspacestate*)_p;
39881     ae_touch_ptr((void*)p);
39882     _hqrndstate_clear(&p->rs);
39883     ae_vector_clear(&p->tau);
39884     ae_matrix_clear(&p->q0);
39885     ae_matrix_clear(&p->qcur);
39886     ae_matrix_clear(&p->qnew);
39887     ae_matrix_clear(&p->znew);
39888     ae_matrix_clear(&p->r);
39889     ae_matrix_clear(&p->rz);
39890     ae_matrix_clear(&p->tz);
39891     ae_matrix_clear(&p->rq);
39892     ae_matrix_clear(&p->dummy);
39893     ae_vector_clear(&p->rw);
39894     ae_vector_clear(&p->tw);
39895     ae_vector_clear(&p->wcur);
39896     ae_vector_clear(&p->wprev);
39897     ae_vector_clear(&p->wrank);
39898     _apbuffers_clear(&p->buf);
39899     ae_matrix_clear(&p->x);
39900     ae_matrix_clear(&p->ax);
39901     _rcommstate_clear(&p->rstate);
39902 }
39903 
39904 
_eigsubspacestate_destroy(void * _p)39905 void _eigsubspacestate_destroy(void* _p)
39906 {
39907     eigsubspacestate *p = (eigsubspacestate*)_p;
39908     ae_touch_ptr((void*)p);
39909     _hqrndstate_destroy(&p->rs);
39910     ae_vector_destroy(&p->tau);
39911     ae_matrix_destroy(&p->q0);
39912     ae_matrix_destroy(&p->qcur);
39913     ae_matrix_destroy(&p->qnew);
39914     ae_matrix_destroy(&p->znew);
39915     ae_matrix_destroy(&p->r);
39916     ae_matrix_destroy(&p->rz);
39917     ae_matrix_destroy(&p->tz);
39918     ae_matrix_destroy(&p->rq);
39919     ae_matrix_destroy(&p->dummy);
39920     ae_vector_destroy(&p->rw);
39921     ae_vector_destroy(&p->tw);
39922     ae_vector_destroy(&p->wcur);
39923     ae_vector_destroy(&p->wprev);
39924     ae_vector_destroy(&p->wrank);
39925     _apbuffers_destroy(&p->buf);
39926     ae_matrix_destroy(&p->x);
39927     ae_matrix_destroy(&p->ax);
39928     _rcommstate_destroy(&p->rstate);
39929 }
39930 
39931 
_eigsubspacereport_init(void * _p,ae_state * _state,ae_bool make_automatic)39932 void _eigsubspacereport_init(void* _p, ae_state *_state, ae_bool make_automatic)
39933 {
39934     eigsubspacereport *p = (eigsubspacereport*)_p;
39935     ae_touch_ptr((void*)p);
39936 }
39937 
39938 
_eigsubspacereport_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)39939 void _eigsubspacereport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
39940 {
39941     eigsubspacereport *dst = (eigsubspacereport*)_dst;
39942     eigsubspacereport *src = (eigsubspacereport*)_src;
39943     dst->iterationscount = src->iterationscount;
39944 }
39945 
39946 
_eigsubspacereport_clear(void * _p)39947 void _eigsubspacereport_clear(void* _p)
39948 {
39949     eigsubspacereport *p = (eigsubspacereport*)_p;
39950     ae_touch_ptr((void*)p);
39951 }
39952 
39953 
_eigsubspacereport_destroy(void * _p)39954 void _eigsubspacereport_destroy(void* _p)
39955 {
39956     eigsubspacereport *p = (eigsubspacereport*)_p;
39957     ae_touch_ptr((void*)p);
39958 }
39959 
39960 
39961 #endif
39962 #if defined(AE_COMPILE_DLU) || !defined(AE_PARTIAL_BUILD)
39963 
39964 
39965 /*************************************************************************
39966 Recurrent complex LU subroutine.
39967 Never call it directly.
39968 
39969   -- ALGLIB routine --
39970      04.01.2010
39971      Bochkanov Sergey
39972 *************************************************************************/
cmatrixluprec(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_vector * tmp,ae_state * _state)39973 void cmatrixluprec(/* Complex */ ae_matrix* a,
39974      ae_int_t offs,
39975      ae_int_t m,
39976      ae_int_t n,
39977      /* Integer */ ae_vector* pivots,
39978      /* Complex */ ae_vector* tmp,
39979      ae_state *_state)
39980 {
39981     ae_int_t i;
39982     ae_int_t m1;
39983     ae_int_t m2;
39984 
39985 
39986     if( ae_minint(m, n, _state)<=ablascomplexblocksize(a, _state) )
39987     {
39988         dlu_cmatrixlup2(a, offs, m, n, pivots, tmp, _state);
39989         return;
39990     }
39991     if( m>n )
39992     {
39993         cmatrixluprec(a, offs, n, n, pivots, tmp, _state);
39994         for(i=0; i<=n-1; i++)
39995         {
39996             ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+n][offs+i], a->stride, "N", ae_v_len(0,m-n-1));
39997             ae_v_cmove(&a->ptr.pp_complex[offs+n][offs+i], a->stride, &a->ptr.pp_complex[offs+n][pivots->ptr.p_int[offs+i]], a->stride, "N", ae_v_len(offs+n,offs+m-1));
39998             ae_v_cmove(&a->ptr.pp_complex[offs+n][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+n,offs+m-1));
39999         }
40000         cmatrixrighttrsm(m-n, n, a, offs, offs, ae_true, ae_true, 0, a, offs+n, offs, _state);
40001         return;
40002     }
40003     ablascomplexsplitlength(a, m, &m1, &m2, _state);
40004     cmatrixluprec(a, offs, m1, n, pivots, tmp, _state);
40005     if( m2>0 )
40006     {
40007         for(i=0; i<=m1-1; i++)
40008         {
40009             if( offs+i!=pivots->ptr.p_int[offs+i] )
40010             {
40011                 ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+m1][offs+i], a->stride, "N", ae_v_len(0,m2-1));
40012                 ae_v_cmove(&a->ptr.pp_complex[offs+m1][offs+i], a->stride, &a->ptr.pp_complex[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, "N", ae_v_len(offs+m1,offs+m-1));
40013                 ae_v_cmove(&a->ptr.pp_complex[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+m1,offs+m-1));
40014             }
40015         }
40016         cmatrixrighttrsm(m2, m1, a, offs, offs, ae_true, ae_true, 0, a, offs+m1, offs, _state);
40017         cmatrixgemm(m-m1, n-m1, m1, ae_complex_from_d(-1.0), a, offs+m1, offs, 0, a, offs, offs+m1, 0, ae_complex_from_d(1.0), a, offs+m1, offs+m1, _state);
40018         cmatrixluprec(a, offs+m1, m-m1, n-m1, pivots, tmp, _state);
40019         for(i=0; i<=m2-1; i++)
40020         {
40021             if( offs+m1+i!=pivots->ptr.p_int[offs+m1+i] )
40022             {
40023                 ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+m1+i], a->stride, "N", ae_v_len(0,m1-1));
40024                 ae_v_cmove(&a->ptr.pp_complex[offs][offs+m1+i], a->stride, &a->ptr.pp_complex[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, "N", ae_v_len(offs,offs+m1-1));
40025                 ae_v_cmove(&a->ptr.pp_complex[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+m1-1));
40026             }
40027         }
40028     }
40029 }
40030 
40031 
40032 /*************************************************************************
40033 Recurrent real LU subroutine.
40034 Never call it directly.
40035 
40036   -- ALGLIB routine --
40037      04.01.2010
40038      Bochkanov Sergey
40039 *************************************************************************/
rmatrixluprec(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_vector * tmp,ae_state * _state)40040 void rmatrixluprec(/* Real    */ ae_matrix* a,
40041      ae_int_t offs,
40042      ae_int_t m,
40043      ae_int_t n,
40044      /* Integer */ ae_vector* pivots,
40045      /* Real    */ ae_vector* tmp,
40046      ae_state *_state)
40047 {
40048     ae_int_t i;
40049     ae_int_t m1;
40050     ae_int_t m2;
40051 
40052 
40053     if( ae_minint(m, n, _state)<=ablasblocksize(a, _state) )
40054     {
40055         dlu_rmatrixlup2(a, offs, m, n, pivots, tmp, _state);
40056         return;
40057     }
40058     if( m>n )
40059     {
40060         rmatrixluprec(a, offs, n, n, pivots, tmp, _state);
40061         for(i=0; i<=n-1; i++)
40062         {
40063             if( offs+i!=pivots->ptr.p_int[offs+i] )
40064             {
40065                 ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+n][offs+i], a->stride, ae_v_len(0,m-n-1));
40066                 ae_v_move(&a->ptr.pp_double[offs+n][offs+i], a->stride, &a->ptr.pp_double[offs+n][pivots->ptr.p_int[offs+i]], a->stride, ae_v_len(offs+n,offs+m-1));
40067                 ae_v_move(&a->ptr.pp_double[offs+n][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs+n,offs+m-1));
40068             }
40069         }
40070         rmatrixrighttrsm(m-n, n, a, offs, offs, ae_true, ae_true, 0, a, offs+n, offs, _state);
40071         return;
40072     }
40073     ablassplitlength(a, m, &m1, &m2, _state);
40074     rmatrixluprec(a, offs, m1, n, pivots, tmp, _state);
40075     if( m2>0 )
40076     {
40077         for(i=0; i<=m1-1; i++)
40078         {
40079             if( offs+i!=pivots->ptr.p_int[offs+i] )
40080             {
40081                 ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+m1][offs+i], a->stride, ae_v_len(0,m2-1));
40082                 ae_v_move(&a->ptr.pp_double[offs+m1][offs+i], a->stride, &a->ptr.pp_double[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, ae_v_len(offs+m1,offs+m-1));
40083                 ae_v_move(&a->ptr.pp_double[offs+m1][pivots->ptr.p_int[offs+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs+m1,offs+m-1));
40084             }
40085         }
40086         rmatrixrighttrsm(m2, m1, a, offs, offs, ae_true, ae_true, 0, a, offs+m1, offs, _state);
40087         rmatrixgemm(m-m1, n-m1, m1, -1.0, a, offs+m1, offs, 0, a, offs, offs+m1, 0, 1.0, a, offs+m1, offs+m1, _state);
40088         rmatrixluprec(a, offs+m1, m-m1, n-m1, pivots, tmp, _state);
40089         for(i=0; i<=m2-1; i++)
40090         {
40091             if( offs+m1+i!=pivots->ptr.p_int[offs+m1+i] )
40092             {
40093                 ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+m1+i], a->stride, ae_v_len(0,m1-1));
40094                 ae_v_move(&a->ptr.pp_double[offs][offs+m1+i], a->stride, &a->ptr.pp_double[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, ae_v_len(offs,offs+m1-1));
40095                 ae_v_move(&a->ptr.pp_double[offs][pivots->ptr.p_int[offs+m1+i]], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+m1-1));
40096             }
40097         }
40098     }
40099 }
40100 
40101 
40102 /*************************************************************************
40103 Recurrent complex LU subroutine.
40104 Never call it directly.
40105 
40106   -- ALGLIB routine --
40107      04.01.2010
40108      Bochkanov Sergey
40109 *************************************************************************/
cmatrixplurec(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_vector * tmp,ae_state * _state)40110 void cmatrixplurec(/* Complex */ ae_matrix* a,
40111      ae_int_t offs,
40112      ae_int_t m,
40113      ae_int_t n,
40114      /* Integer */ ae_vector* pivots,
40115      /* Complex */ ae_vector* tmp,
40116      ae_state *_state)
40117 {
40118     ae_int_t i;
40119     ae_int_t n1;
40120     ae_int_t n2;
40121     ae_int_t tsa;
40122     ae_int_t tsb;
40123 
40124 
40125     tsa = matrixtilesizea(_state)/2;
40126     tsb = matrixtilesizeb(_state);
40127     if( n<=tsa )
40128     {
40129         dlu_cmatrixplu2(a, offs, m, n, pivots, tmp, _state);
40130         return;
40131     }
40132     if( n>m )
40133     {
40134         cmatrixplurec(a, offs, m, m, pivots, tmp, _state);
40135         for(i=0; i<=m-1; i++)
40136         {
40137             ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs+m], 1, "N", ae_v_len(0,n-m-1));
40138             ae_v_cmove(&a->ptr.pp_complex[offs+i][offs+m], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+m], 1, "N", ae_v_len(offs+m,offs+n-1));
40139             ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+m], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+m,offs+n-1));
40140         }
40141         cmatrixlefttrsm(m, n-m, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+m, _state);
40142         return;
40143     }
40144     if( n>tsb )
40145     {
40146         n1 = tsb;
40147         n2 = n-n1;
40148     }
40149     else
40150     {
40151         tiledsplit(n, tsa, &n1, &n2, _state);
40152     }
40153     cmatrixplurec(a, offs, m, n1, pivots, tmp, _state);
40154     if( n2>0 )
40155     {
40156         for(i=0; i<=n1-1; i++)
40157         {
40158             if( offs+i!=pivots->ptr.p_int[offs+i] )
40159             {
40160                 ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs+n1], 1, "N", ae_v_len(0,n2-1));
40161                 ae_v_cmove(&a->ptr.pp_complex[offs+i][offs+n1], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+n1], 1, "N", ae_v_len(offs+n1,offs+n-1));
40162                 ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+i]][offs+n1], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs+n1,offs+n-1));
40163             }
40164         }
40165         cmatrixlefttrsm(n1, n2, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+n1, _state);
40166         cmatrixgemm(m-n1, n-n1, n1, ae_complex_from_d(-1.0), a, offs+n1, offs, 0, a, offs, offs+n1, 0, ae_complex_from_d(1.0), a, offs+n1, offs+n1, _state);
40167         cmatrixplurec(a, offs+n1, m-n1, n-n1, pivots, tmp, _state);
40168         for(i=0; i<=n2-1; i++)
40169         {
40170             if( offs+n1+i!=pivots->ptr.p_int[offs+n1+i] )
40171             {
40172                 ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+n1+i][offs], 1, "N", ae_v_len(0,n1-1));
40173                 ae_v_cmove(&a->ptr.pp_complex[offs+n1+i][offs], 1, &a->ptr.pp_complex[pivots->ptr.p_int[offs+n1+i]][offs], 1, "N", ae_v_len(offs,offs+n1-1));
40174                 ae_v_cmove(&a->ptr.pp_complex[pivots->ptr.p_int[offs+n1+i]][offs], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+n1-1));
40175             }
40176         }
40177     }
40178 }
40179 
40180 
40181 /*************************************************************************
40182 Recurrent real LU subroutine.
40183 Never call it directly.
40184 
40185   -- ALGLIB routine --
40186      04.01.2010
40187      Bochkanov Sergey
40188 *************************************************************************/
rmatrixplurec(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_vector * tmp,ae_state * _state)40189 void rmatrixplurec(/* Real    */ ae_matrix* a,
40190      ae_int_t offs,
40191      ae_int_t m,
40192      ae_int_t n,
40193      /* Integer */ ae_vector* pivots,
40194      /* Real    */ ae_vector* tmp,
40195      ae_state *_state)
40196 {
40197     ae_int_t i;
40198     ae_int_t n1;
40199     ae_int_t n2;
40200     ae_int_t tsa;
40201     ae_int_t tsb;
40202 
40203 
40204     tsa = matrixtilesizea(_state);
40205     tsb = matrixtilesizeb(_state);
40206     if( n<=tsb )
40207     {
40208         if( rmatrixplumkl(a, offs, m, n, pivots, _state) )
40209         {
40210             return;
40211         }
40212     }
40213     if( n<=tsa )
40214     {
40215         dlu_rmatrixplu2(a, offs, m, n, pivots, tmp, _state);
40216         return;
40217     }
40218     if( n>m )
40219     {
40220         rmatrixplurec(a, offs, m, m, pivots, tmp, _state);
40221         for(i=0; i<=m-1; i++)
40222         {
40223             ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs+m], 1, ae_v_len(0,n-m-1));
40224             ae_v_move(&a->ptr.pp_double[offs+i][offs+m], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+m], 1, ae_v_len(offs+m,offs+n-1));
40225             ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+m], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs+m,offs+n-1));
40226         }
40227         rmatrixlefttrsm(m, n-m, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+m, _state);
40228         return;
40229     }
40230     if( n>tsb )
40231     {
40232         n1 = tsb;
40233         n2 = n-n1;
40234     }
40235     else
40236     {
40237         tiledsplit(n, tsa, &n1, &n2, _state);
40238     }
40239     rmatrixplurec(a, offs, m, n1, pivots, tmp, _state);
40240     if( n2>0 )
40241     {
40242         for(i=0; i<=n1-1; i++)
40243         {
40244             if( offs+i!=pivots->ptr.p_int[offs+i] )
40245             {
40246                 ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(0,n2-1));
40247                 ae_v_move(&a->ptr.pp_double[offs+i][offs+n1], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+n1], 1, ae_v_len(offs+n1,offs+n-1));
40248                 ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+i]][offs+n1], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs+n1,offs+n-1));
40249             }
40250         }
40251         rmatrixlefttrsm(n1, n2, a, offs, offs, ae_false, ae_true, 0, a, offs, offs+n1, _state);
40252         rmatrixgemm(m-n1, n-n1, n1, -1.0, a, offs+n1, offs, 0, a, offs, offs+n1, 0, 1.0, a, offs+n1, offs+n1, _state);
40253         rmatrixplurec(a, offs+n1, m-n1, n-n1, pivots, tmp, _state);
40254         for(i=0; i<=n2-1; i++)
40255         {
40256             if( offs+n1+i!=pivots->ptr.p_int[offs+n1+i] )
40257             {
40258                 ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(0,n1-1));
40259                 ae_v_move(&a->ptr.pp_double[offs+n1+i][offs], 1, &a->ptr.pp_double[pivots->ptr.p_int[offs+n1+i]][offs], 1, ae_v_len(offs,offs+n1-1));
40260                 ae_v_move(&a->ptr.pp_double[pivots->ptr.p_int[offs+n1+i]][offs], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+n1-1));
40261             }
40262         }
40263     }
40264 }
40265 
40266 
40267 /*************************************************************************
40268 Complex LUP kernel
40269 
40270   -- ALGLIB routine --
40271      10.01.2010
40272      Bochkanov Sergey
40273 *************************************************************************/
dlu_cmatrixlup2(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_vector * tmp,ae_state * _state)40274 static void dlu_cmatrixlup2(/* Complex */ ae_matrix* a,
40275      ae_int_t offs,
40276      ae_int_t m,
40277      ae_int_t n,
40278      /* Integer */ ae_vector* pivots,
40279      /* Complex */ ae_vector* tmp,
40280      ae_state *_state)
40281 {
40282     ae_int_t i;
40283     ae_int_t j;
40284     ae_int_t jp;
40285     ae_complex s;
40286 
40287 
40288     if( m==0||n==0 )
40289     {
40290         return;
40291     }
40292     for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
40293     {
40294         jp = j;
40295         for(i=j+1; i<=n-1; i++)
40296         {
40297             if( ae_fp_greater(ae_c_abs(a->ptr.pp_complex[offs+j][offs+i], _state),ae_c_abs(a->ptr.pp_complex[offs+j][offs+jp], _state)) )
40298             {
40299                 jp = i;
40300             }
40301         }
40302         pivots->ptr.p_int[offs+j] = offs+jp;
40303         if( jp!=j )
40304         {
40305             ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+j], a->stride, "N", ae_v_len(0,m-1));
40306             ae_v_cmove(&a->ptr.pp_complex[offs][offs+j], a->stride, &a->ptr.pp_complex[offs][offs+jp], a->stride, "N", ae_v_len(offs,offs+m-1));
40307             ae_v_cmove(&a->ptr.pp_complex[offs][offs+jp], a->stride, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+m-1));
40308         }
40309         if( ae_c_neq_d(a->ptr.pp_complex[offs+j][offs+j],(double)(0))&&j+1<=n-1 )
40310         {
40311             s = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
40312             ae_v_cmulc(&a->ptr.pp_complex[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), s);
40313         }
40314         if( j<ae_minint(m-1, n-1, _state) )
40315         {
40316             ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(0,m-j-2));
40317             ae_v_cmoveneg(&tmp->ptr.p_complex[m], 1, &a->ptr.pp_complex[offs+j][offs+j+1], 1, "N", ae_v_len(m,m+n-j-2));
40318             cmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
40319         }
40320     }
40321 }
40322 
40323 
40324 /*************************************************************************
40325 Real LUP kernel
40326 
40327   -- ALGLIB routine --
40328      10.01.2010
40329      Bochkanov Sergey
40330 *************************************************************************/
dlu_rmatrixlup2(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_vector * tmp,ae_state * _state)40331 static void dlu_rmatrixlup2(/* Real    */ ae_matrix* a,
40332      ae_int_t offs,
40333      ae_int_t m,
40334      ae_int_t n,
40335      /* Integer */ ae_vector* pivots,
40336      /* Real    */ ae_vector* tmp,
40337      ae_state *_state)
40338 {
40339     ae_int_t i;
40340     ae_int_t j;
40341     ae_int_t jp;
40342     double s;
40343 
40344 
40345     if( m==0||n==0 )
40346     {
40347         return;
40348     }
40349     for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
40350     {
40351         jp = j;
40352         for(i=j+1; i<=n-1; i++)
40353         {
40354             if( ae_fp_greater(ae_fabs(a->ptr.pp_double[offs+j][offs+i], _state),ae_fabs(a->ptr.pp_double[offs+j][offs+jp], _state)) )
40355             {
40356                 jp = i;
40357             }
40358         }
40359         pivots->ptr.p_int[offs+j] = offs+jp;
40360         if( jp!=j )
40361         {
40362             ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+j], a->stride, ae_v_len(0,m-1));
40363             ae_v_move(&a->ptr.pp_double[offs][offs+j], a->stride, &a->ptr.pp_double[offs][offs+jp], a->stride, ae_v_len(offs,offs+m-1));
40364             ae_v_move(&a->ptr.pp_double[offs][offs+jp], a->stride, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+m-1));
40365         }
40366         if( ae_fp_neq(a->ptr.pp_double[offs+j][offs+j],(double)(0))&&j+1<=n-1 )
40367         {
40368             s = 1/a->ptr.pp_double[offs+j][offs+j];
40369             ae_v_muld(&a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), s);
40370         }
40371         if( j<ae_minint(m-1, n-1, _state) )
40372         {
40373             ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(0,m-j-2));
40374             ae_v_moveneg(&tmp->ptr.p_double[m], 1, &a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(m,m+n-j-2));
40375             rmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
40376         }
40377     }
40378 }
40379 
40380 
40381 /*************************************************************************
40382 Complex PLU kernel
40383 
40384   -- LAPACK routine (version 3.0) --
40385      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
40386      Courant Institute, Argonne National Lab, and Rice University
40387      June 30, 1992
40388 *************************************************************************/
dlu_cmatrixplu2(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_vector * tmp,ae_state * _state)40389 static void dlu_cmatrixplu2(/* Complex */ ae_matrix* a,
40390      ae_int_t offs,
40391      ae_int_t m,
40392      ae_int_t n,
40393      /* Integer */ ae_vector* pivots,
40394      /* Complex */ ae_vector* tmp,
40395      ae_state *_state)
40396 {
40397     ae_int_t i;
40398     ae_int_t j;
40399     ae_int_t jp;
40400     ae_complex s;
40401 
40402 
40403     if( m==0||n==0 )
40404     {
40405         return;
40406     }
40407     for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
40408     {
40409         jp = j;
40410         for(i=j+1; i<=m-1; i++)
40411         {
40412             if( ae_fp_greater(ae_c_abs(a->ptr.pp_complex[offs+i][offs+j], _state),ae_c_abs(a->ptr.pp_complex[offs+jp][offs+j], _state)) )
40413             {
40414                 jp = i;
40415             }
40416         }
40417         pivots->ptr.p_int[offs+j] = offs+jp;
40418         if( ae_c_neq_d(a->ptr.pp_complex[offs+jp][offs+j],(double)(0)) )
40419         {
40420             if( jp!=j )
40421             {
40422                 for(i=0; i<=n-1; i++)
40423                 {
40424                     s = a->ptr.pp_complex[offs+j][offs+i];
40425                     a->ptr.pp_complex[offs+j][offs+i] = a->ptr.pp_complex[offs+jp][offs+i];
40426                     a->ptr.pp_complex[offs+jp][offs+i] = s;
40427                 }
40428             }
40429             if( j+1<=m-1 )
40430             {
40431                 s = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
40432                 ae_v_cmulc(&a->ptr.pp_complex[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+m-1), s);
40433             }
40434         }
40435         if( j<ae_minint(m, n, _state)-1 )
40436         {
40437             ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(0,m-j-2));
40438             ae_v_cmoveneg(&tmp->ptr.p_complex[m], 1, &a->ptr.pp_complex[offs+j][offs+j+1], 1, "N", ae_v_len(m,m+n-j-2));
40439             cmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
40440         }
40441     }
40442 }
40443 
40444 
40445 /*************************************************************************
40446 Real PLU kernel
40447 
40448   -- LAPACK routine (version 3.0) --
40449      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
40450      Courant Institute, Argonne National Lab, and Rice University
40451      June 30, 1992
40452 *************************************************************************/
dlu_rmatrixplu2(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_vector * tmp,ae_state * _state)40453 static void dlu_rmatrixplu2(/* Real    */ ae_matrix* a,
40454      ae_int_t offs,
40455      ae_int_t m,
40456      ae_int_t n,
40457      /* Integer */ ae_vector* pivots,
40458      /* Real    */ ae_vector* tmp,
40459      ae_state *_state)
40460 {
40461     ae_int_t i;
40462     ae_int_t j;
40463     ae_int_t jp;
40464     double s;
40465 
40466 
40467     if( m==0||n==0 )
40468     {
40469         return;
40470     }
40471     for(j=0; j<=ae_minint(m-1, n-1, _state); j++)
40472     {
40473         jp = j;
40474         for(i=j+1; i<=m-1; i++)
40475         {
40476             if( ae_fp_greater(ae_fabs(a->ptr.pp_double[offs+i][offs+j], _state),ae_fabs(a->ptr.pp_double[offs+jp][offs+j], _state)) )
40477             {
40478                 jp = i;
40479             }
40480         }
40481         pivots->ptr.p_int[offs+j] = offs+jp;
40482         if( ae_fp_neq(a->ptr.pp_double[offs+jp][offs+j],(double)(0)) )
40483         {
40484             if( jp!=j )
40485             {
40486                 for(i=0; i<=n-1; i++)
40487                 {
40488                     s = a->ptr.pp_double[offs+j][offs+i];
40489                     a->ptr.pp_double[offs+j][offs+i] = a->ptr.pp_double[offs+jp][offs+i];
40490                     a->ptr.pp_double[offs+jp][offs+i] = s;
40491                 }
40492             }
40493             if( j+1<=m-1 )
40494             {
40495                 s = 1/a->ptr.pp_double[offs+j][offs+j];
40496                 ae_v_muld(&a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+m-1), s);
40497             }
40498         }
40499         if( j<ae_minint(m, n, _state)-1 )
40500         {
40501             ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(0,m-j-2));
40502             ae_v_moveneg(&tmp->ptr.p_double[m], 1, &a->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(m,m+n-j-2));
40503             rmatrixrank1(m-j-1, n-j-1, a, offs+j+1, offs+j+1, tmp, 0, tmp, m, _state);
40504         }
40505     }
40506 }
40507 
40508 
40509 #endif
40510 #if defined(AE_COMPILE_SPTRF) || !defined(AE_PARTIAL_BUILD)
40511 
40512 
40513 /*************************************************************************
40514 Sparse LU for square NxN CRS matrix with both row and column permutations.
40515 
40516 Represents A as Pr*L*U*Pc, where:
40517 * Pr is a product of row permutations Pr=Pr(0)*Pr(1)*...*Pr(n-2)*Pr(n-1)
40518 * Pc is a product of col permutations Pc=Pc(n-1)*Pc(n-2)*...*Pc(1)*Pc(0)
40519 * L is lower unitriangular
40520 * U is upper triangular
40521 
40522 INPUT PARAMETERS:
40523     A           -   sparse square matrix in CRS format
40524     PivotType   -   pivot type:
40525                     * 0 - for best pivoting available
40526                     * 1 - row-only pivoting
40527                     * 2 - row and column greedy pivoting  algorithm  (most
40528                           sparse pivot column is selected from the trailing
40529                           matrix at each step)
40530     Buf         -   temporary buffer, previously allocated memory is
40531                     reused as much as possible
40532 
40533 OUTPUT PARAMETERS:
40534     A           -   LU decomposition of A
40535     PR          -   array[N], row pivots
40536     PC          -   array[N], column pivots
40537     Buf         -   following fields of Buf are set:
40538                     * Buf.RowPermRawIdx[] - contains row permutation, with
40539                       RawIdx[I]=J meaning that J-th row  of  the  original
40540                       input matrix was moved to Ith position of the output
40541                       factorization
40542 
40543 This function always succeeds  i.e. it ALWAYS returns valid factorization,
40544 but for your convenience it also  returns boolean  value  which  helps  to
40545 detect symbolically degenerate matrix:
40546 * function returns TRUE if the matrix was factorized AND symbolically
40547   non-degenerate
40548 * function returns FALSE if the matrix was factorized but U has strictly
40549   zero elements at the diagonal (the factorization is returned anyway).
40550 
40551   -- ALGLIB routine --
40552      15.01.2019
40553      Bochkanov Sergey
40554 *************************************************************************/
sptrflu(sparsematrix * a,ae_int_t pivottype,ae_vector * pr,ae_vector * pc,sluv2buffer * buf,ae_state * _state)40555 ae_bool sptrflu(sparsematrix* a,
40556      ae_int_t pivottype,
40557      /* Integer */ ae_vector* pr,
40558      /* Integer */ ae_vector* pc,
40559      sluv2buffer* buf,
40560      ae_state *_state)
40561 {
40562     ae_int_t n;
40563     ae_int_t k;
40564     ae_int_t i;
40565     ae_int_t j;
40566     ae_int_t jp;
40567     ae_int_t i0;
40568     ae_int_t i1;
40569     ae_int_t ibest;
40570     ae_int_t jbest;
40571     double v;
40572     double v0;
40573     ae_int_t nz0;
40574     ae_int_t nz1;
40575     double uu;
40576     ae_int_t offs;
40577     ae_int_t tmpndense;
40578     ae_bool densificationsupported;
40579     ae_int_t densifyabove;
40580     ae_bool result;
40581 
40582 
40583     ae_assert(sparseiscrs(a, _state), "SparseLU: A is not stored in CRS format", _state);
40584     ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseLU: non-square A", _state);
40585     ae_assert((pivottype==0||pivottype==1)||pivottype==2, "SparseLU: unexpected pivot type", _state);
40586     result = ae_true;
40587     n = sparsegetnrows(a, _state);
40588     if( pivottype==0 )
40589     {
40590         pivottype = 2;
40591     }
40592     densificationsupported = pivottype==2;
40593 
40594     /*
40595      *
40596      */
40597     buf->n = n;
40598     ivectorsetlengthatleast(&buf->rowpermrawidx, n, _state);
40599     for(i=0; i<=n-1; i++)
40600     {
40601         buf->rowpermrawidx.ptr.p_int[i] = i;
40602     }
40603 
40604     /*
40605      * Allocate storage for sparse L and U factors
40606      *
40607      * NOTE: SparseMatrix structure for these factors is only
40608      *       partially initialized; we use it just as a temporary
40609      *       storage and do not intend to use facilities of the
40610      *       'sparse' subpackage to work with these objects.
40611      */
40612     buf->sparsel.matrixtype = 1;
40613     buf->sparsel.m = n;
40614     buf->sparsel.n = n;
40615     ivectorsetlengthatleast(&buf->sparsel.ridx, n+1, _state);
40616     buf->sparsel.ridx.ptr.p_int[0] = 0;
40617     buf->sparseut.matrixtype = 1;
40618     buf->sparseut.m = n;
40619     buf->sparseut.n = n;
40620     ivectorsetlengthatleast(&buf->sparseut.ridx, n+1, _state);
40621     buf->sparseut.ridx.ptr.p_int[0] = 0;
40622 
40623     /*
40624      * Allocate unprocessed yet part of the matrix,
40625      * two submatrices:
40626      * * BU, upper J rows of columns [J,N), upper submatrix
40627      * * BL, left J  cols of rows [J,N), left submatrix
40628      * * B1, (N-J)*(N-J) square submatrix
40629      */
40630     sptrf_sluv2list1init(n, &buf->bleft, _state);
40631     sptrf_sluv2list1init(n, &buf->bupper, _state);
40632     ivectorsetlengthatleast(pr, n, _state);
40633     ivectorsetlengthatleast(pc, n, _state);
40634     ivectorsetlengthatleast(&buf->v0i, n, _state);
40635     ivectorsetlengthatleast(&buf->v1i, n, _state);
40636     rvectorsetlengthatleast(&buf->v0r, n, _state);
40637     rvectorsetlengthatleast(&buf->v1r, n, _state);
40638     sptrf_sparsetrailinit(a, &buf->strail, _state);
40639 
40640     /*
40641      * Prepare dense trail, initial densification
40642      */
40643     sptrf_densetrailinit(&buf->dtrail, n, _state);
40644     densifyabove = ae_round(sptrf_densebnd*n, _state)+1;
40645     if( densificationsupported )
40646     {
40647         for(i=0; i<=n-1; i++)
40648         {
40649             if( buf->strail.nzc.ptr.p_int[i]>densifyabove )
40650             {
40651                 sptrf_sparsetraildensify(&buf->strail, i, &buf->bupper, &buf->dtrail, _state);
40652             }
40653         }
40654     }
40655 
40656     /*
40657      * Process sparse part
40658      */
40659     for(k=0; k<=n-1; k++)
40660     {
40661 
40662         /*
40663          * Find pivot column and pivot row
40664          */
40665         if( !sptrf_sparsetrailfindpivot(&buf->strail, pivottype, &ibest, &jbest, _state) )
40666         {
40667 
40668             /*
40669              * Only densified columns are left, break sparse iteration
40670              */
40671             ae_assert(buf->dtrail.ndense+k==n, "SPTRF: integrity check failed (35741)", _state);
40672             break;
40673         }
40674         pc->ptr.p_int[k] = jbest;
40675         pr->ptr.p_int[k] = ibest;
40676         j = buf->rowpermrawidx.ptr.p_int[k];
40677         buf->rowpermrawidx.ptr.p_int[k] = buf->rowpermrawidx.ptr.p_int[ibest];
40678         buf->rowpermrawidx.ptr.p_int[ibest] = j;
40679 
40680         /*
40681          * Apply pivoting to BL and BU
40682          */
40683         sptrf_sluv2list1swap(&buf->bleft, k, ibest, _state);
40684         sptrf_sluv2list1swap(&buf->bupper, k, jbest, _state);
40685 
40686         /*
40687          * Apply pivoting to sparse trail, pivot out
40688          */
40689         sptrf_sparsetrailpivotout(&buf->strail, ibest, jbest, &uu, &buf->v0i, &buf->v0r, &nz0, &buf->v1i, &buf->v1r, &nz1, _state);
40690         result = result&&uu!=0;
40691 
40692         /*
40693          * Pivot dense trail
40694          */
40695         tmpndense = buf->dtrail.ndense;
40696         for(i=0; i<=tmpndense-1; i++)
40697         {
40698             v = buf->dtrail.d.ptr.pp_double[k][i];
40699             buf->dtrail.d.ptr.pp_double[k][i] = buf->dtrail.d.ptr.pp_double[ibest][i];
40700             buf->dtrail.d.ptr.pp_double[ibest][i] = v;
40701         }
40702 
40703         /*
40704          * Output to LU matrix
40705          */
40706         sptrf_sluv2list1appendsequencetomatrix(&buf->bupper, k, ae_true, uu, n, &buf->sparseut, k, _state);
40707         sptrf_sluv2list1appendsequencetomatrix(&buf->bleft, k, ae_false, 0.0, n, &buf->sparsel, k, _state);
40708 
40709         /*
40710          * Extract K-th col/row of B1, generate K-th col/row of BL/BU, update NZC
40711          */
40712         sptrf_sluv2list1pushsparsevector(&buf->bleft, &buf->v0i, &buf->v0r, nz0, _state);
40713         sptrf_sluv2list1pushsparsevector(&buf->bupper, &buf->v1i, &buf->v1r, nz1, _state);
40714 
40715         /*
40716          * Update the rest of the matrix
40717          */
40718         if( nz0*(nz1+buf->dtrail.ndense)>0 )
40719         {
40720 
40721             /*
40722              * Update dense trail
40723              *
40724              * NOTE: this update MUST be performed before we update sparse trail,
40725              *       because sparse update may move columns to dense storage after
40726              *       update is performed on them. Thus, we have to avoid applying
40727              *       same update twice.
40728              */
40729             if( buf->dtrail.ndense>0 )
40730             {
40731                 tmpndense = buf->dtrail.ndense;
40732                 for(i=0; i<=nz0-1; i++)
40733                 {
40734                     i0 = buf->v0i.ptr.p_int[i];
40735                     v0 = buf->v0r.ptr.p_double[i];
40736                     for(j=0; j<=tmpndense-1; j++)
40737                     {
40738                         buf->dtrail.d.ptr.pp_double[i0][j] = buf->dtrail.d.ptr.pp_double[i0][j]-v0*buf->dtrail.d.ptr.pp_double[k][j];
40739                     }
40740                 }
40741             }
40742 
40743             /*
40744              * Update sparse trail
40745              */
40746             sptrf_sparsetrailupdate(&buf->strail, &buf->v0i, &buf->v0r, nz0, &buf->v1i, &buf->v1r, nz1, &buf->bupper, &buf->dtrail, densificationsupported, _state);
40747         }
40748     }
40749 
40750     /*
40751      * Process densified trail
40752      */
40753     if( buf->dtrail.ndense>0 )
40754     {
40755         tmpndense = buf->dtrail.ndense;
40756 
40757         /*
40758          * Generate column pivots to bring actual order of columns in the
40759          * working part of the matrix to one used for dense storage
40760          */
40761         for(i=n-tmpndense; i<=n-1; i++)
40762         {
40763             k = buf->dtrail.did.ptr.p_int[i-(n-tmpndense)];
40764             jp = -1;
40765             for(j=i; j<=n-1; j++)
40766             {
40767                 if( buf->strail.colid.ptr.p_int[j]==k )
40768                 {
40769                     jp = j;
40770                     break;
40771                 }
40772             }
40773             ae_assert(jp>=0, "SPTRF: integrity check failed during reordering", _state);
40774             k = buf->strail.colid.ptr.p_int[i];
40775             buf->strail.colid.ptr.p_int[i] = buf->strail.colid.ptr.p_int[jp];
40776             buf->strail.colid.ptr.p_int[jp] = k;
40777             pc->ptr.p_int[i] = jp;
40778         }
40779 
40780         /*
40781          * Perform dense LU decomposition on dense trail
40782          */
40783         rmatrixsetlengthatleast(&buf->dbuf, buf->dtrail.ndense, buf->dtrail.ndense, _state);
40784         for(i=0; i<=tmpndense-1; i++)
40785         {
40786             for(j=0; j<=tmpndense-1; j++)
40787             {
40788                 buf->dbuf.ptr.pp_double[i][j] = buf->dtrail.d.ptr.pp_double[i+(n-tmpndense)][j];
40789             }
40790         }
40791         rvectorsetlengthatleast(&buf->tmp0, 2*n, _state);
40792         ivectorsetlengthatleast(&buf->tmpp, n, _state);
40793         rmatrixplurec(&buf->dbuf, 0, tmpndense, tmpndense, &buf->tmpp, &buf->tmp0, _state);
40794 
40795         /*
40796          * Convert indexes of rows pivots, swap elements of BLeft
40797          */
40798         for(i=0; i<=tmpndense-1; i++)
40799         {
40800             pr->ptr.p_int[i+(n-tmpndense)] = buf->tmpp.ptr.p_int[i]+(n-tmpndense);
40801             sptrf_sluv2list1swap(&buf->bleft, i+(n-tmpndense), pr->ptr.p_int[i+(n-tmpndense)], _state);
40802             j = buf->rowpermrawidx.ptr.p_int[i+(n-tmpndense)];
40803             buf->rowpermrawidx.ptr.p_int[i+(n-tmpndense)] = buf->rowpermrawidx.ptr.p_int[pr->ptr.p_int[i+(n-tmpndense)]];
40804             buf->rowpermrawidx.ptr.p_int[pr->ptr.p_int[i+(n-tmpndense)]] = j;
40805         }
40806 
40807         /*
40808          * Convert U-factor
40809          */
40810         ivectorgrowto(&buf->sparseut.idx, buf->sparseut.ridx.ptr.p_int[n-tmpndense]+n*tmpndense, _state);
40811         rvectorgrowto(&buf->sparseut.vals, buf->sparseut.ridx.ptr.p_int[n-tmpndense]+n*tmpndense, _state);
40812         for(j=0; j<=tmpndense-1; j++)
40813         {
40814             offs = buf->sparseut.ridx.ptr.p_int[j+(n-tmpndense)];
40815             k = n-tmpndense;
40816 
40817             /*
40818              * Convert leading N-NDense columns
40819              */
40820             for(i=0; i<=k-1; i++)
40821             {
40822                 v = buf->dtrail.d.ptr.pp_double[i][j];
40823                 if( v!=0 )
40824                 {
40825                     buf->sparseut.idx.ptr.p_int[offs] = i;
40826                     buf->sparseut.vals.ptr.p_double[offs] = v;
40827                     offs = offs+1;
40828                 }
40829             }
40830 
40831             /*
40832              * Convert upper diagonal elements
40833              */
40834             for(i=0; i<=j-1; i++)
40835             {
40836                 v = buf->dbuf.ptr.pp_double[i][j];
40837                 if( v!=0 )
40838                 {
40839                     buf->sparseut.idx.ptr.p_int[offs] = i+(n-tmpndense);
40840                     buf->sparseut.vals.ptr.p_double[offs] = v;
40841                     offs = offs+1;
40842                 }
40843             }
40844 
40845             /*
40846              * Convert diagonal element (always stored)
40847              */
40848             v = buf->dbuf.ptr.pp_double[j][j];
40849             buf->sparseut.idx.ptr.p_int[offs] = j+(n-tmpndense);
40850             buf->sparseut.vals.ptr.p_double[offs] = v;
40851             offs = offs+1;
40852             result = result&&v!=0;
40853 
40854             /*
40855              * Column is done
40856              */
40857             buf->sparseut.ridx.ptr.p_int[j+(n-tmpndense)+1] = offs;
40858         }
40859 
40860         /*
40861          * Convert L-factor
40862          */
40863         ivectorgrowto(&buf->sparsel.idx, buf->sparsel.ridx.ptr.p_int[n-tmpndense]+n*tmpndense, _state);
40864         rvectorgrowto(&buf->sparsel.vals, buf->sparsel.ridx.ptr.p_int[n-tmpndense]+n*tmpndense, _state);
40865         for(i=0; i<=tmpndense-1; i++)
40866         {
40867             sptrf_sluv2list1appendsequencetomatrix(&buf->bleft, i+(n-tmpndense), ae_false, 0.0, n, &buf->sparsel, i+(n-tmpndense), _state);
40868             offs = buf->sparsel.ridx.ptr.p_int[i+(n-tmpndense)+1];
40869             for(j=0; j<=i-1; j++)
40870             {
40871                 v = buf->dbuf.ptr.pp_double[i][j];
40872                 if( v!=0 )
40873                 {
40874                     buf->sparsel.idx.ptr.p_int[offs] = j+(n-tmpndense);
40875                     buf->sparsel.vals.ptr.p_double[offs] = v;
40876                     offs = offs+1;
40877                 }
40878             }
40879             buf->sparsel.ridx.ptr.p_int[i+(n-tmpndense)+1] = offs;
40880         }
40881     }
40882 
40883     /*
40884      * Allocate output
40885      */
40886     ivectorsetlengthatleast(&buf->tmpi, n, _state);
40887     for(i=0; i<=n-1; i++)
40888     {
40889         buf->tmpi.ptr.p_int[i] = buf->sparsel.ridx.ptr.p_int[i+1]-buf->sparsel.ridx.ptr.p_int[i];
40890     }
40891     for(i=0; i<=n-1; i++)
40892     {
40893         i0 = buf->sparseut.ridx.ptr.p_int[i];
40894         i1 = buf->sparseut.ridx.ptr.p_int[i+1]-1;
40895         for(j=i0; j<=i1; j++)
40896         {
40897             k = buf->sparseut.idx.ptr.p_int[j];
40898             buf->tmpi.ptr.p_int[k] = buf->tmpi.ptr.p_int[k]+1;
40899         }
40900     }
40901     a->matrixtype = 1;
40902     a->ninitialized = buf->sparsel.ridx.ptr.p_int[n]+buf->sparseut.ridx.ptr.p_int[n];
40903     a->m = n;
40904     a->n = n;
40905     ivectorsetlengthatleast(&a->ridx, n+1, _state);
40906     ivectorsetlengthatleast(&a->idx, a->ninitialized, _state);
40907     rvectorsetlengthatleast(&a->vals, a->ninitialized, _state);
40908     a->ridx.ptr.p_int[0] = 0;
40909     for(i=0; i<=n-1; i++)
40910     {
40911         a->ridx.ptr.p_int[i+1] = a->ridx.ptr.p_int[i]+buf->tmpi.ptr.p_int[i];
40912     }
40913     for(i=0; i<=n-1; i++)
40914     {
40915         i0 = buf->sparsel.ridx.ptr.p_int[i];
40916         i1 = buf->sparsel.ridx.ptr.p_int[i+1]-1;
40917         jp = a->ridx.ptr.p_int[i];
40918         for(j=i0; j<=i1; j++)
40919         {
40920             a->idx.ptr.p_int[jp+(j-i0)] = buf->sparsel.idx.ptr.p_int[j];
40921             a->vals.ptr.p_double[jp+(j-i0)] = buf->sparsel.vals.ptr.p_double[j];
40922         }
40923         buf->tmpi.ptr.p_int[i] = buf->sparsel.ridx.ptr.p_int[i+1]-buf->sparsel.ridx.ptr.p_int[i];
40924     }
40925     ivectorsetlengthatleast(&a->didx, n, _state);
40926     ivectorsetlengthatleast(&a->uidx, n, _state);
40927     for(i=0; i<=n-1; i++)
40928     {
40929         a->didx.ptr.p_int[i] = a->ridx.ptr.p_int[i]+buf->tmpi.ptr.p_int[i];
40930         a->uidx.ptr.p_int[i] = a->didx.ptr.p_int[i]+1;
40931         buf->tmpi.ptr.p_int[i] = a->didx.ptr.p_int[i];
40932     }
40933     for(i=0; i<=n-1; i++)
40934     {
40935         i0 = buf->sparseut.ridx.ptr.p_int[i];
40936         i1 = buf->sparseut.ridx.ptr.p_int[i+1]-1;
40937         for(j=i0; j<=i1; j++)
40938         {
40939             k = buf->sparseut.idx.ptr.p_int[j];
40940             offs = buf->tmpi.ptr.p_int[k];
40941             a->idx.ptr.p_int[offs] = i;
40942             a->vals.ptr.p_double[offs] = buf->sparseut.vals.ptr.p_double[j];
40943             buf->tmpi.ptr.p_int[k] = offs+1;
40944         }
40945     }
40946     return result;
40947 }
40948 
40949 
40950 /*************************************************************************
40951 This function initialized rectangular submatrix structure.
40952 
40953 After initialization this structure stores  matrix[N,0],  which contains N
40954 rows (sequences), stored as single-linked lists.
40955 
40956   -- ALGLIB routine --
40957      15.01.2019
40958      Bochkanov Sergey
40959 *************************************************************************/
sptrf_sluv2list1init(ae_int_t n,sluv2list1matrix * a,ae_state * _state)40960 static void sptrf_sluv2list1init(ae_int_t n,
40961      sluv2list1matrix* a,
40962      ae_state *_state)
40963 {
40964     ae_int_t i;
40965 
40966 
40967     ae_assert(n>=1, "SLUV2List1Init: N<1", _state);
40968     a->nfixed = n;
40969     a->ndynamic = 0;
40970     a->nallocated = n;
40971     a->nused = 0;
40972     ivectorgrowto(&a->idxfirst, n, _state);
40973     ivectorgrowto(&a->strgidx, 2*a->nallocated, _state);
40974     rvectorgrowto(&a->strgval, a->nallocated, _state);
40975     for(i=0; i<=n-1; i++)
40976     {
40977         a->idxfirst.ptr.p_int[i] = -1;
40978     }
40979 }
40980 
40981 
40982 /*************************************************************************
40983 This function swaps sequences #I and #J stored by the structure
40984 
40985   -- ALGLIB routine --
40986      15.01.2019
40987      Bochkanov Sergey
40988 *************************************************************************/
sptrf_sluv2list1swap(sluv2list1matrix * a,ae_int_t i,ae_int_t j,ae_state * _state)40989 static void sptrf_sluv2list1swap(sluv2list1matrix* a,
40990      ae_int_t i,
40991      ae_int_t j,
40992      ae_state *_state)
40993 {
40994     ae_int_t k;
40995 
40996 
40997     k = a->idxfirst.ptr.p_int[i];
40998     a->idxfirst.ptr.p_int[i] = a->idxfirst.ptr.p_int[j];
40999     a->idxfirst.ptr.p_int[j] = k;
41000 }
41001 
41002 
41003 /*************************************************************************
41004 This function drops sequence #I from the structure
41005 
41006   -- ALGLIB routine --
41007      15.01.2019
41008      Bochkanov Sergey
41009 *************************************************************************/
sptrf_sluv2list1dropsequence(sluv2list1matrix * a,ae_int_t i,ae_state * _state)41010 static void sptrf_sluv2list1dropsequence(sluv2list1matrix* a,
41011      ae_int_t i,
41012      ae_state *_state)
41013 {
41014 
41015 
41016     a->idxfirst.ptr.p_int[i] = -1;
41017 }
41018 
41019 
41020 /*************************************************************************
41021 This function appends sequence from the structure to the sparse matrix.
41022 
41023 It is assumed that S is a lower triangular  matrix,  and A stores strictly
41024 lower triangular elements (no diagonal ones!). You can explicitly  control
41025 whether you want to add diagonal elements or not.
41026 
41027 Output matrix is assumed to be stored in CRS format and  to  be  partially
41028 initialized (up to, but not including, Dst-th row). DIdx and UIdx are  NOT
41029 updated by this function as well as NInitialized.
41030 
41031 INPUT PARAMETERS:
41032     A           -   rectangular matrix structure
41033     Src         -   sequence (row or column) index in the structure
41034     HasDiagonal -   whether we want to add diagonal element
41035     D           -   diagonal element, if HasDiagonal=True
41036     NZMAX       -   maximum estimated number of non-zeros in the row,
41037                     this function will preallocate storage in the output
41038                     matrix.
41039     S           -   destination matrix in CRS format, partially initialized
41040     Dst         -   destination row index
41041 
41042 
41043   -- ALGLIB routine --
41044      15.01.2019
41045      Bochkanov Sergey
41046 *************************************************************************/
sptrf_sluv2list1appendsequencetomatrix(sluv2list1matrix * a,ae_int_t src,ae_bool hasdiagonal,double d,ae_int_t nzmax,sparsematrix * s,ae_int_t dst,ae_state * _state)41047 static void sptrf_sluv2list1appendsequencetomatrix(sluv2list1matrix* a,
41048      ae_int_t src,
41049      ae_bool hasdiagonal,
41050      double d,
41051      ae_int_t nzmax,
41052      sparsematrix* s,
41053      ae_int_t dst,
41054      ae_state *_state)
41055 {
41056     ae_int_t i;
41057     ae_int_t i0;
41058     ae_int_t i1;
41059     ae_int_t jp;
41060     ae_int_t nnz;
41061 
41062 
41063     i0 = s->ridx.ptr.p_int[dst];
41064     ivectorgrowto(&s->idx, i0+nzmax, _state);
41065     rvectorgrowto(&s->vals, i0+nzmax, _state);
41066     if( hasdiagonal )
41067     {
41068         i1 = i0+nzmax-1;
41069         s->idx.ptr.p_int[i1] = dst;
41070         s->vals.ptr.p_double[i1] = d;
41071         nnz = 1;
41072     }
41073     else
41074     {
41075         i1 = i0+nzmax;
41076         nnz = 0;
41077     }
41078     jp = a->idxfirst.ptr.p_int[src];
41079     while(jp>=0)
41080     {
41081         i1 = i1-1;
41082         s->idx.ptr.p_int[i1] = a->strgidx.ptr.p_int[2*jp+1];
41083         s->vals.ptr.p_double[i1] = a->strgval.ptr.p_double[jp];
41084         nnz = nnz+1;
41085         jp = a->strgidx.ptr.p_int[2*jp+0];
41086     }
41087     for(i=0; i<=nnz-1; i++)
41088     {
41089         s->idx.ptr.p_int[i0+i] = s->idx.ptr.p_int[i1+i];
41090         s->vals.ptr.p_double[i0+i] = s->vals.ptr.p_double[i1+i];
41091     }
41092     s->ridx.ptr.p_int[dst+1] = s->ridx.ptr.p_int[dst]+nnz;
41093 }
41094 
41095 
41096 /*************************************************************************
41097 This function appends sparse column to the  matrix,  increasing  its  size
41098 from [N,K] to [N,K+1]
41099 
41100   -- ALGLIB routine --
41101      15.01.2019
41102      Bochkanov Sergey
41103 *************************************************************************/
sptrf_sluv2list1pushsparsevector(sluv2list1matrix * a,ae_vector * si,ae_vector * sv,ae_int_t nz,ae_state * _state)41104 static void sptrf_sluv2list1pushsparsevector(sluv2list1matrix* a,
41105      /* Integer */ ae_vector* si,
41106      /* Real    */ ae_vector* sv,
41107      ae_int_t nz,
41108      ae_state *_state)
41109 {
41110     ae_int_t idx;
41111     ae_int_t i;
41112     ae_int_t k;
41113     ae_int_t nused;
41114     double v;
41115 
41116 
41117 
41118     /*
41119      * Fetch matrix size, increase
41120      */
41121     k = a->ndynamic;
41122     ae_assert(k<a->nfixed, "Assertion failed", _state);
41123     a->ndynamic = k+1;
41124 
41125     /*
41126      * Allocate new storage if needed
41127      */
41128     nused = a->nused;
41129     a->nallocated = ae_maxint(a->nallocated, nused+nz, _state);
41130     ivectorgrowto(&a->strgidx, 2*a->nallocated, _state);
41131     rvectorgrowto(&a->strgval, a->nallocated, _state);
41132 
41133     /*
41134      * Append to list
41135      */
41136     for(idx=0; idx<=nz-1; idx++)
41137     {
41138         i = si->ptr.p_int[idx];
41139         v = sv->ptr.p_double[idx];
41140         a->strgidx.ptr.p_int[2*nused+0] = a->idxfirst.ptr.p_int[i];
41141         a->strgidx.ptr.p_int[2*nused+1] = k;
41142         a->strgval.ptr.p_double[nused] = v;
41143         a->idxfirst.ptr.p_int[i] = nused;
41144         nused = nused+1;
41145     }
41146     a->nused = nused;
41147 }
41148 
41149 
41150 /*************************************************************************
41151 This function initializes dense trail, by default it is matrix[N,0]
41152 
41153   -- ALGLIB routine --
41154      15.01.2019
41155      Bochkanov Sergey
41156 *************************************************************************/
sptrf_densetrailinit(sluv2densetrail * d,ae_int_t n,ae_state * _state)41157 static void sptrf_densetrailinit(sluv2densetrail* d,
41158      ae_int_t n,
41159      ae_state *_state)
41160 {
41161     ae_int_t excessivesize;
41162 
41163 
41164 
41165     /*
41166      * Note: excessive rows are allocated to accomodate for situation when
41167      *       this buffer is used to solve successive problems with increasing
41168      *       sizes.
41169      */
41170     excessivesize = ae_maxint(ae_round(1.333*n, _state), n, _state);
41171     d->n = n;
41172     d->ndense = 0;
41173     ivectorsetlengthatleast(&d->did, n, _state);
41174     if( d->d.rows<=excessivesize )
41175     {
41176         rmatrixsetlengthatleast(&d->d, n, 1, _state);
41177     }
41178     else
41179     {
41180         ae_matrix_set_length(&d->d, excessivesize, 1, _state);
41181     }
41182 }
41183 
41184 
41185 /*************************************************************************
41186 This function appends column with id=ID to the dense trail (column IDs are
41187 integer numbers in [0,N) which can be used to track column permutations).
41188 
41189   -- ALGLIB routine --
41190      15.01.2019
41191      Bochkanov Sergey
41192 *************************************************************************/
sptrf_densetrailappendcolumn(sluv2densetrail * d,ae_vector * x,ae_int_t id,ae_state * _state)41193 static void sptrf_densetrailappendcolumn(sluv2densetrail* d,
41194      /* Real    */ ae_vector* x,
41195      ae_int_t id,
41196      ae_state *_state)
41197 {
41198     ae_int_t n;
41199     ae_int_t i;
41200     ae_int_t targetidx;
41201 
41202 
41203     n = d->n;
41204 
41205     /*
41206      * Reallocate storage
41207      */
41208     rmatrixgrowcolsto(&d->d, d->ndense+1, n, _state);
41209 
41210     /*
41211      * Copy to dense storage:
41212      * * BUpper
41213      * * BTrail
41214      * Remove from sparse storage
41215      */
41216     targetidx = d->ndense;
41217     for(i=0; i<=n-1; i++)
41218     {
41219         d->d.ptr.pp_double[i][targetidx] = x->ptr.p_double[i];
41220     }
41221     d->did.ptr.p_int[targetidx] = id;
41222     d->ndense = targetidx+1;
41223 }
41224 
41225 
41226 /*************************************************************************
41227 This function initializes sparse trail from the sparse matrix. By default,
41228 sparse trail spans columns and rows in [0,N)  range.  Subsequent  pivoting
41229 out of rows/columns changes its range to [K,N), [K+1,N) and so on.
41230 
41231   -- ALGLIB routine --
41232      15.01.2019
41233      Bochkanov Sergey
41234 *************************************************************************/
sptrf_sparsetrailinit(sparsematrix * s,sluv2sparsetrail * a,ae_state * _state)41235 static void sptrf_sparsetrailinit(sparsematrix* s,
41236      sluv2sparsetrail* a,
41237      ae_state *_state)
41238 {
41239     ae_int_t i;
41240     ae_int_t j;
41241     ae_int_t n;
41242     ae_int_t j0;
41243     ae_int_t j1;
41244     ae_int_t jj;
41245     ae_int_t p;
41246     ae_int_t slsused;
41247 
41248 
41249     ae_assert(s->m==s->n, "SparseTrailInit: M<>N", _state);
41250     ae_assert(s->matrixtype==1, "SparseTrailInit: non-CRS input", _state);
41251     n = s->n;
41252     a->n = s->n;
41253     a->k = 0;
41254     ivectorsetlengthatleast(&a->nzc, n, _state);
41255     ivectorsetlengthatleast(&a->colid, n, _state);
41256     rvectorsetlengthatleast(&a->tmp0, n, _state);
41257     for(i=0; i<=n-1; i++)
41258     {
41259         a->colid.ptr.p_int[i] = i;
41260     }
41261     bvectorsetlengthatleast(&a->isdensified, n, _state);
41262     for(i=0; i<=n-1; i++)
41263     {
41264         a->isdensified.ptr.p_bool[i] = ae_false;
41265     }
41266 
41267     /*
41268      * Working set of columns
41269      */
41270     a->maxwrkcnt = iboundval(ae_round(1+(double)n/(double)3, _state), 1, ae_minint(n, 50, _state), _state);
41271     a->wrkcnt = 0;
41272     ivectorsetlengthatleast(&a->wrkset, a->maxwrkcnt, _state);
41273 
41274     /*
41275      * Sparse linked storage (SLS). Store CRS matrix to SLS format,
41276      * row by row, starting from the last one.
41277      */
41278     ivectorsetlengthatleast(&a->slscolptr, n, _state);
41279     ivectorsetlengthatleast(&a->slsrowptr, n, _state);
41280     ivectorsetlengthatleast(&a->slsidx, s->ridx.ptr.p_int[n]*sptrf_slswidth, _state);
41281     rvectorsetlengthatleast(&a->slsval, s->ridx.ptr.p_int[n], _state);
41282     for(i=0; i<=n-1; i++)
41283     {
41284         a->nzc.ptr.p_int[i] = 0;
41285     }
41286     for(i=0; i<=n-1; i++)
41287     {
41288         a->slscolptr.ptr.p_int[i] = -1;
41289         a->slsrowptr.ptr.p_int[i] = -1;
41290     }
41291     slsused = 0;
41292     for(i=n-1; i>=0; i--)
41293     {
41294         j0 = s->ridx.ptr.p_int[i];
41295         j1 = s->ridx.ptr.p_int[i+1]-1;
41296         for(jj=j1; jj>=j0; jj--)
41297         {
41298             j = s->idx.ptr.p_int[jj];
41299 
41300             /*
41301              * Update non-zero counts for columns
41302              */
41303             a->nzc.ptr.p_int[j] = a->nzc.ptr.p_int[j]+1;
41304 
41305             /*
41306              * Insert into column list
41307              */
41308             p = a->slscolptr.ptr.p_int[j];
41309             if( p>=0 )
41310             {
41311                 a->slsidx.ptr.p_int[p*sptrf_slswidth+0] = slsused;
41312             }
41313             a->slsidx.ptr.p_int[slsused*sptrf_slswidth+0] = -1;
41314             a->slsidx.ptr.p_int[slsused*sptrf_slswidth+1] = p;
41315             a->slscolptr.ptr.p_int[j] = slsused;
41316 
41317             /*
41318              * Insert into row list
41319              */
41320             p = a->slsrowptr.ptr.p_int[i];
41321             if( p>=0 )
41322             {
41323                 a->slsidx.ptr.p_int[p*sptrf_slswidth+2] = slsused;
41324             }
41325             a->slsidx.ptr.p_int[slsused*sptrf_slswidth+2] = -1;
41326             a->slsidx.ptr.p_int[slsused*sptrf_slswidth+3] = p;
41327             a->slsrowptr.ptr.p_int[i] = slsused;
41328 
41329             /*
41330              * Store index and value
41331              */
41332             a->slsidx.ptr.p_int[slsused*sptrf_slswidth+4] = i;
41333             a->slsidx.ptr.p_int[slsused*sptrf_slswidth+5] = j;
41334             a->slsval.ptr.p_double[slsused] = s->vals.ptr.p_double[jj];
41335             slsused = slsused+1;
41336         }
41337     }
41338     a->slsused = slsused;
41339 }
41340 
41341 
41342 /*************************************************************************
41343 This function searches for a appropriate pivot column/row.
41344 
41345 If there exists non-densified column, it returns indexes of  pivot  column
41346 and row, with most sparse column selected for column pivoting, and largest
41347 element selected for row pivoting. Function result is True.
41348 
41349 PivotType=1 means that no column pivoting is performed
41350 PivotType=2 means that both column and row pivoting are supported
41351 
41352 If all columns were densified, False is returned.
41353 
41354   -- ALGLIB routine --
41355      15.01.2019
41356      Bochkanov Sergey
41357 *************************************************************************/
sptrf_sparsetrailfindpivot(sluv2sparsetrail * a,ae_int_t pivottype,ae_int_t * ipiv,ae_int_t * jpiv,ae_state * _state)41358 static ae_bool sptrf_sparsetrailfindpivot(sluv2sparsetrail* a,
41359      ae_int_t pivottype,
41360      ae_int_t* ipiv,
41361      ae_int_t* jpiv,
41362      ae_state *_state)
41363 {
41364     ae_int_t n;
41365     ae_int_t k;
41366     ae_int_t j;
41367     ae_int_t jp;
41368     ae_int_t entry;
41369     ae_int_t nz;
41370     ae_int_t maxwrknz;
41371     ae_int_t nnzbest;
41372     double s;
41373     double bbest;
41374     ae_int_t wrk0;
41375     ae_int_t wrk1;
41376     ae_bool result;
41377 
41378     *ipiv = 0;
41379     *jpiv = 0;
41380 
41381     n = a->n;
41382     k = a->k;
41383     nnzbest = n+1;
41384     *jpiv = -1;
41385     *ipiv = -1;
41386     result = ae_true;
41387 
41388     /*
41389      * Select pivot column
41390      */
41391     if( pivottype==1 )
41392     {
41393 
41394         /*
41395          * No column pivoting
41396          */
41397         ae_assert(!a->isdensified.ptr.p_bool[k], "SparseTrailFindPivot: integrity check failed", _state);
41398         *jpiv = k;
41399     }
41400     else
41401     {
41402 
41403         /*
41404          * Find pivot column
41405          */
41406         for(;;)
41407         {
41408 
41409             /*
41410              * Scan working set (if non-empty) for good columns
41411              */
41412             maxwrknz = a->maxwrknz;
41413             for(j=0; j<=a->wrkcnt-1; j++)
41414             {
41415                 jp = a->wrkset.ptr.p_int[j];
41416                 if( jp<k )
41417                 {
41418                     continue;
41419                 }
41420                 if( a->isdensified.ptr.p_bool[jp] )
41421                 {
41422                     continue;
41423                 }
41424                 nz = a->nzc.ptr.p_int[jp];
41425                 if( nz>maxwrknz )
41426                 {
41427                     continue;
41428                 }
41429                 if( *jpiv<0||nz<nnzbest )
41430                 {
41431                     nnzbest = nz;
41432                     *jpiv = jp;
41433                 }
41434             }
41435             if( *jpiv>=0 )
41436             {
41437                 break;
41438             }
41439 
41440             /*
41441              * Well, nothing found. Recompute working set:
41442              * * determine most sparse unprocessed yet column
41443              * * gather all columns with density in [Wrk0,Wrk1) range,
41444              *   increase range, repeat, until working set is full
41445              */
41446             a->wrkcnt = 0;
41447             a->maxwrknz = 0;
41448             wrk0 = n+1;
41449             for(jp=k; jp<=n-1; jp++)
41450             {
41451                 if( !a->isdensified.ptr.p_bool[jp]&&a->nzc.ptr.p_int[jp]<wrk0 )
41452                 {
41453                     wrk0 = a->nzc.ptr.p_int[jp];
41454                 }
41455             }
41456             if( wrk0>n )
41457             {
41458 
41459                 /*
41460                  * Only densified columns are present, exit.
41461                  */
41462                 result = ae_false;
41463                 return result;
41464             }
41465             wrk1 = wrk0+1;
41466             while(a->wrkcnt<a->maxwrkcnt&&wrk0<=n)
41467             {
41468 
41469                 /*
41470                  * Find columns with non-zero count in [Wrk0,Wrk1) range
41471                  */
41472                 for(jp=k; jp<=n-1; jp++)
41473                 {
41474                     if( a->wrkcnt==a->maxwrkcnt )
41475                     {
41476                         break;
41477                     }
41478                     if( a->isdensified.ptr.p_bool[jp] )
41479                     {
41480                         continue;
41481                     }
41482                     if( a->nzc.ptr.p_int[jp]>=wrk0&&a->nzc.ptr.p_int[jp]<wrk1 )
41483                     {
41484                         a->wrkset.ptr.p_int[a->wrkcnt] = jp;
41485                         a->wrkcnt = a->wrkcnt+1;
41486                         a->maxwrknz = ae_maxint(a->maxwrknz, a->nzc.ptr.p_int[jp], _state);
41487                     }
41488                 }
41489 
41490                 /*
41491                  * Advance scan range
41492                  */
41493                 jp = ae_round(1.41*(wrk1-wrk0), _state)+1;
41494                 wrk0 = wrk1;
41495                 wrk1 = wrk0+jp;
41496             }
41497         }
41498     }
41499 
41500     /*
41501      * Select pivot row
41502      */
41503     bbest = (double)(0);
41504     entry = a->slscolptr.ptr.p_int[*jpiv];
41505     while(entry>=0)
41506     {
41507         s = ae_fabs(a->slsval.ptr.p_double[entry], _state);
41508         if( *ipiv<0||ae_fp_greater(s,bbest) )
41509         {
41510             bbest = s;
41511             *ipiv = a->slsidx.ptr.p_int[entry*sptrf_slswidth+4];
41512         }
41513         entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
41514     }
41515     if( *ipiv<0 )
41516     {
41517         *ipiv = k;
41518     }
41519     return result;
41520 }
41521 
41522 
41523 /*************************************************************************
41524 This function pivots out specified row and column.
41525 
41526 Sparse trail range changes from [K,N) to [K+1,N).
41527 
41528 V0I, V0R, V1I, V1R must be preallocated arrays[N].
41529 
41530 Following data are returned:
41531 * UU - diagonal element (pivoted out), can be zero
41532 * V0I, V0R, NZ0 - sparse column pivoted out to the left (after permutation
41533   is applied to its elements) and divided by UU.
41534   V0I is array[NZ0] which stores row indexes in [K+1,N) range, V0R  stores
41535   values.
41536 * V1I, V1R, NZ1 - sparse row pivoted out to the top.
41537 
41538   -- ALGLIB routine --
41539      15.01.2019
41540      Bochkanov Sergey
41541 *************************************************************************/
sptrf_sparsetrailpivotout(sluv2sparsetrail * a,ae_int_t ipiv,ae_int_t jpiv,double * uu,ae_vector * v0i,ae_vector * v0r,ae_int_t * nz0,ae_vector * v1i,ae_vector * v1r,ae_int_t * nz1,ae_state * _state)41542 static void sptrf_sparsetrailpivotout(sluv2sparsetrail* a,
41543      ae_int_t ipiv,
41544      ae_int_t jpiv,
41545      double* uu,
41546      /* Integer */ ae_vector* v0i,
41547      /* Real    */ ae_vector* v0r,
41548      ae_int_t* nz0,
41549      /* Integer */ ae_vector* v1i,
41550      /* Real    */ ae_vector* v1r,
41551      ae_int_t* nz1,
41552      ae_state *_state)
41553 {
41554     ae_int_t n;
41555     ae_int_t k;
41556     ae_int_t i;
41557     ae_int_t j;
41558     ae_int_t entry;
41559     double v;
41560     double s;
41561     ae_bool vb;
41562     ae_int_t pos0k;
41563     ae_int_t pos0piv;
41564     ae_int_t pprev;
41565     ae_int_t pnext;
41566     ae_int_t pnextnext;
41567 
41568     *uu = 0;
41569     *nz0 = 0;
41570     *nz1 = 0;
41571 
41572     n = a->n;
41573     k = a->k;
41574     ae_assert(k<n, "SparseTrailPivotOut: integrity check failed", _state);
41575 
41576     /*
41577      * Pivot out column JPiv from the sparse linked storage:
41578      * * remove column JPiv from the matrix
41579      * * update column K:
41580      *   * change element indexes after it is permuted to JPiv
41581      *   * resort rows affected by move K->JPiv
41582      *
41583      * NOTE: this code leaves V0I/V0R/NZ0 in the unfinalized state,
41584      *       i.e. these arrays do not account for pivoting performed
41585      *       on rows. They will be post-processed later.
41586      */
41587     *nz0 = 0;
41588     pos0k = -1;
41589     pos0piv = -1;
41590     entry = a->slscolptr.ptr.p_int[jpiv];
41591     while(entry>=0)
41592     {
41593 
41594         /*
41595          * Offload element
41596          */
41597         i = a->slsidx.ptr.p_int[entry*sptrf_slswidth+4];
41598         v0i->ptr.p_int[*nz0] = i;
41599         v0r->ptr.p_double[*nz0] = a->slsval.ptr.p_double[entry];
41600         if( i==k )
41601         {
41602             pos0k = *nz0;
41603         }
41604         if( i==ipiv )
41605         {
41606             pos0piv = *nz0;
41607         }
41608         *nz0 = *nz0+1;
41609 
41610         /*
41611          * Remove element from the row list
41612          */
41613         pprev = a->slsidx.ptr.p_int[entry*sptrf_slswidth+2];
41614         pnext = a->slsidx.ptr.p_int[entry*sptrf_slswidth+3];
41615         if( pprev>=0 )
41616         {
41617             a->slsidx.ptr.p_int[pprev*sptrf_slswidth+3] = pnext;
41618         }
41619         else
41620         {
41621             a->slsrowptr.ptr.p_int[i] = pnext;
41622         }
41623         if( pnext>=0 )
41624         {
41625             a->slsidx.ptr.p_int[pnext*sptrf_slswidth+2] = pprev;
41626         }
41627 
41628         /*
41629          * Select next entry
41630          */
41631         entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
41632     }
41633     entry = a->slscolptr.ptr.p_int[k];
41634     a->slscolptr.ptr.p_int[jpiv] = entry;
41635     while(entry>=0)
41636     {
41637 
41638         /*
41639          * Change column index
41640          */
41641         a->slsidx.ptr.p_int[entry*sptrf_slswidth+5] = jpiv;
41642 
41643         /*
41644          * Next entry
41645          */
41646         entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
41647     }
41648 
41649     /*
41650      * Post-process V0, account for pivoting.
41651      * Compute diagonal element UU.
41652      */
41653     *uu = (double)(0);
41654     if( pos0k>=0||pos0piv>=0 )
41655     {
41656 
41657         /*
41658          * Apply permutation to rows of pivoted out column, specific
41659          * implementation depends on the sparsity at locations #Pos0K
41660          * and #Pos0Piv of the V0 array.
41661          */
41662         if( pos0k>=0&&pos0piv>=0 )
41663         {
41664 
41665             /*
41666              * Obtain diagonal element
41667              */
41668             *uu = v0r->ptr.p_double[pos0piv];
41669             if( *uu!=0 )
41670             {
41671                 s = 1/(*uu);
41672             }
41673             else
41674             {
41675                 s = (double)(1);
41676             }
41677 
41678             /*
41679              * Move pivoted out element, shift array by one in order
41680              * to remove heading diagonal element (not needed here
41681              * anymore).
41682              */
41683             v0r->ptr.p_double[pos0piv] = v0r->ptr.p_double[pos0k];
41684             for(i=0; i<=*nz0-2; i++)
41685             {
41686                 v0i->ptr.p_int[i] = v0i->ptr.p_int[i+1];
41687                 v0r->ptr.p_double[i] = v0r->ptr.p_double[i+1]*s;
41688             }
41689             *nz0 = *nz0-1;
41690         }
41691         if( pos0k>=0&&pos0piv<0 )
41692         {
41693 
41694             /*
41695              * Diagonal element is zero
41696              */
41697             *uu = (double)(0);
41698 
41699             /*
41700              * Pivot out element, reorder array
41701              */
41702             v0i->ptr.p_int[pos0k] = ipiv;
41703             for(i=pos0k; i<=*nz0-2; i++)
41704             {
41705                 if( v0i->ptr.p_int[i]<v0i->ptr.p_int[i+1] )
41706                 {
41707                     break;
41708                 }
41709                 j = v0i->ptr.p_int[i];
41710                 v0i->ptr.p_int[i] = v0i->ptr.p_int[i+1];
41711                 v0i->ptr.p_int[i+1] = j;
41712                 v = v0r->ptr.p_double[i];
41713                 v0r->ptr.p_double[i] = v0r->ptr.p_double[i+1];
41714                 v0r->ptr.p_double[i+1] = v;
41715             }
41716         }
41717         if( pos0k<0&&pos0piv>=0 )
41718         {
41719 
41720             /*
41721              * Get diagonal element
41722              */
41723             *uu = v0r->ptr.p_double[pos0piv];
41724             if( *uu!=0 )
41725             {
41726                 s = 1/(*uu);
41727             }
41728             else
41729             {
41730                 s = (double)(1);
41731             }
41732 
41733             /*
41734              * Shift array past the pivoted in element by one
41735              * in order to remove pivot
41736              */
41737             for(i=0; i<=pos0piv-1; i++)
41738             {
41739                 v0r->ptr.p_double[i] = v0r->ptr.p_double[i]*s;
41740             }
41741             for(i=pos0piv; i<=*nz0-2; i++)
41742             {
41743                 v0i->ptr.p_int[i] = v0i->ptr.p_int[i+1];
41744                 v0r->ptr.p_double[i] = v0r->ptr.p_double[i+1]*s;
41745             }
41746             *nz0 = *nz0-1;
41747         }
41748     }
41749 
41750     /*
41751      * Pivot out row IPiv from the sparse linked storage:
41752      * * remove row IPiv from the matrix
41753      * * reindex elements of row K after it is permuted to IPiv
41754      * * apply permutation to the cols of the pivoted out row,
41755      *   resort columns
41756      */
41757     *nz1 = 0;
41758     entry = a->slsrowptr.ptr.p_int[ipiv];
41759     while(entry>=0)
41760     {
41761 
41762         /*
41763          * Offload element
41764          */
41765         j = a->slsidx.ptr.p_int[entry*sptrf_slswidth+5];
41766         v1i->ptr.p_int[*nz1] = j;
41767         v1r->ptr.p_double[*nz1] = a->slsval.ptr.p_double[entry];
41768         *nz1 = *nz1+1;
41769 
41770         /*
41771          * Remove element from the column list
41772          */
41773         pprev = a->slsidx.ptr.p_int[entry*sptrf_slswidth+0];
41774         pnext = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
41775         if( pprev>=0 )
41776         {
41777             a->slsidx.ptr.p_int[pprev*sptrf_slswidth+1] = pnext;
41778         }
41779         else
41780         {
41781             a->slscolptr.ptr.p_int[j] = pnext;
41782         }
41783         if( pnext>=0 )
41784         {
41785             a->slsidx.ptr.p_int[pnext*sptrf_slswidth+0] = pprev;
41786         }
41787 
41788         /*
41789          * Select next entry
41790          */
41791         entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+3];
41792     }
41793     a->slsrowptr.ptr.p_int[ipiv] = a->slsrowptr.ptr.p_int[k];
41794     entry = a->slsrowptr.ptr.p_int[ipiv];
41795     while(entry>=0)
41796     {
41797 
41798         /*
41799          * Change row index
41800          */
41801         a->slsidx.ptr.p_int[entry*sptrf_slswidth+4] = ipiv;
41802 
41803         /*
41804          * Resort column affected by row pivoting
41805          */
41806         j = a->slsidx.ptr.p_int[entry*sptrf_slswidth+5];
41807         pprev = a->slsidx.ptr.p_int[entry*sptrf_slswidth+0];
41808         pnext = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
41809         while(pnext>=0&&a->slsidx.ptr.p_int[pnext*sptrf_slswidth+4]<ipiv)
41810         {
41811             pnextnext = a->slsidx.ptr.p_int[pnext*sptrf_slswidth+1];
41812 
41813             /*
41814              * prev->next
41815              */
41816             if( pprev>=0 )
41817             {
41818                 a->slsidx.ptr.p_int[pprev*sptrf_slswidth+1] = pnext;
41819             }
41820             else
41821             {
41822                 a->slscolptr.ptr.p_int[j] = pnext;
41823             }
41824 
41825             /*
41826              * entry->prev, entry->next
41827              */
41828             a->slsidx.ptr.p_int[entry*sptrf_slswidth+0] = pnext;
41829             a->slsidx.ptr.p_int[entry*sptrf_slswidth+1] = pnextnext;
41830 
41831             /*
41832              * next->prev, next->next
41833              */
41834             a->slsidx.ptr.p_int[pnext*sptrf_slswidth+0] = pprev;
41835             a->slsidx.ptr.p_int[pnext*sptrf_slswidth+1] = entry;
41836 
41837             /*
41838              * nextnext->prev
41839              */
41840             if( pnextnext>=0 )
41841             {
41842                 a->slsidx.ptr.p_int[pnextnext*sptrf_slswidth+0] = entry;
41843             }
41844 
41845             /*
41846              * PPrev, Item, PNext
41847              */
41848             pprev = pnext;
41849             pnext = pnextnext;
41850         }
41851 
41852         /*
41853          * Next entry
41854          */
41855         entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+3];
41856     }
41857 
41858     /*
41859      * Reorder other structures
41860      */
41861     i = a->nzc.ptr.p_int[k];
41862     a->nzc.ptr.p_int[k] = a->nzc.ptr.p_int[jpiv];
41863     a->nzc.ptr.p_int[jpiv] = i;
41864     i = a->colid.ptr.p_int[k];
41865     a->colid.ptr.p_int[k] = a->colid.ptr.p_int[jpiv];
41866     a->colid.ptr.p_int[jpiv] = i;
41867     vb = a->isdensified.ptr.p_bool[k];
41868     a->isdensified.ptr.p_bool[k] = a->isdensified.ptr.p_bool[jpiv];
41869     a->isdensified.ptr.p_bool[jpiv] = vb;
41870 
41871     /*
41872      * Handle removal of col/row #K
41873      */
41874     for(i=0; i<=*nz1-1; i++)
41875     {
41876         j = v1i->ptr.p_int[i];
41877         a->nzc.ptr.p_int[j] = a->nzc.ptr.p_int[j]-1;
41878     }
41879     a->k = a->k+1;
41880 }
41881 
41882 
41883 /*************************************************************************
41884 This function densifies I1-th column of the sparse trail.
41885 
41886 PARAMETERS:
41887     A           -   sparse trail
41888     I1          -   column index
41889     BUpper      -   upper rectangular submatrix, updated during densification
41890                     of the columns (densified columns are removed)
41891     DTrail      -   dense trail, receives densified columns from sparse
41892                     trail and BUpper
41893 
41894   -- ALGLIB routine --
41895      15.01.2019
41896      Bochkanov Sergey
41897 *************************************************************************/
sptrf_sparsetraildensify(sluv2sparsetrail * a,ae_int_t i1,sluv2list1matrix * bupper,sluv2densetrail * dtrail,ae_state * _state)41898 static void sptrf_sparsetraildensify(sluv2sparsetrail* a,
41899      ae_int_t i1,
41900      sluv2list1matrix* bupper,
41901      sluv2densetrail* dtrail,
41902      ae_state *_state)
41903 {
41904     ae_int_t n;
41905     ae_int_t k;
41906     ae_int_t i;
41907     ae_int_t jp;
41908     ae_int_t entry;
41909     ae_int_t pprev;
41910     ae_int_t pnext;
41911 
41912 
41913     n = a->n;
41914     k = a->k;
41915     ae_assert(k<n, "SparseTrailDensify: integrity check failed", _state);
41916     ae_assert(k<=i1, "SparseTrailDensify: integrity check failed", _state);
41917     ae_assert(!a->isdensified.ptr.p_bool[i1], "SparseTrailDensify: integrity check failed", _state);
41918 
41919     /*
41920      * Offload items [0,K) of densified column from BUpper
41921      */
41922     for(i=0; i<=n-1; i++)
41923     {
41924         a->tmp0.ptr.p_double[i] = (double)(0);
41925     }
41926     jp = bupper->idxfirst.ptr.p_int[i1];
41927     while(jp>=0)
41928     {
41929         a->tmp0.ptr.p_double[bupper->strgidx.ptr.p_int[2*jp+1]] = bupper->strgval.ptr.p_double[jp];
41930         jp = bupper->strgidx.ptr.p_int[2*jp+0];
41931     }
41932     sptrf_sluv2list1dropsequence(bupper, i1, _state);
41933 
41934     /*
41935      * Offload items [K,N) of densified column from BLeft
41936      */
41937     entry = a->slscolptr.ptr.p_int[i1];
41938     while(entry>=0)
41939     {
41940 
41941         /*
41942          * Offload element
41943          */
41944         i = a->slsidx.ptr.p_int[entry*sptrf_slswidth+4];
41945         a->tmp0.ptr.p_double[i] = a->slsval.ptr.p_double[entry];
41946 
41947         /*
41948          * Remove element from the row list
41949          */
41950         pprev = a->slsidx.ptr.p_int[entry*sptrf_slswidth+2];
41951         pnext = a->slsidx.ptr.p_int[entry*sptrf_slswidth+3];
41952         if( pprev>=0 )
41953         {
41954             a->slsidx.ptr.p_int[pprev*sptrf_slswidth+3] = pnext;
41955         }
41956         else
41957         {
41958             a->slsrowptr.ptr.p_int[i] = pnext;
41959         }
41960         if( pnext>=0 )
41961         {
41962             a->slsidx.ptr.p_int[pnext*sptrf_slswidth+2] = pprev;
41963         }
41964 
41965         /*
41966          * Select next entry
41967          */
41968         entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
41969     }
41970 
41971     /*
41972      * Densify
41973      */
41974     a->nzc.ptr.p_int[i1] = 0;
41975     a->isdensified.ptr.p_bool[i1] = ae_true;
41976     a->slscolptr.ptr.p_int[i1] = -1;
41977     sptrf_densetrailappendcolumn(dtrail, &a->tmp0, a->colid.ptr.p_int[i1], _state);
41978 }
41979 
41980 
41981 /*************************************************************************
41982 This function appends rank-1 update to the sparse trail.  Dense  trail  is
41983 not  updated  here,  but  we  may  move some columns to dense trail during
41984 update (i.e. densify them). Thus, you have to update  dense  trail  BEFORE
41985 you start updating sparse one (otherwise, recently densified columns  will
41986 be updated twice).
41987 
41988 PARAMETERS:
41989     A           -   sparse trail
41990     V0I, V0R    -   update column returned by SparseTrailPivotOut (MUST be
41991                     array[N] independently of the NZ0).
41992     NZ0         -   non-zero count for update column
41993     V1I, V1R    -   update row returned by SparseTrailPivotOut
41994     NZ1         -   non-zero count for update row
41995     BUpper      -   upper rectangular submatrix, updated during densification
41996                     of the columns (densified columns are removed)
41997     DTrail      -   dense trail, receives densified columns from sparse
41998                     trail and BUpper
41999     DensificationSupported- if False, no densification is performed
42000 
42001   -- ALGLIB routine --
42002      15.01.2019
42003      Bochkanov Sergey
42004 *************************************************************************/
sptrf_sparsetrailupdate(sluv2sparsetrail * a,ae_vector * v0i,ae_vector * v0r,ae_int_t nz0,ae_vector * v1i,ae_vector * v1r,ae_int_t nz1,sluv2list1matrix * bupper,sluv2densetrail * dtrail,ae_bool densificationsupported,ae_state * _state)42005 static void sptrf_sparsetrailupdate(sluv2sparsetrail* a,
42006      /* Integer */ ae_vector* v0i,
42007      /* Real    */ ae_vector* v0r,
42008      ae_int_t nz0,
42009      /* Integer */ ae_vector* v1i,
42010      /* Real    */ ae_vector* v1r,
42011      ae_int_t nz1,
42012      sluv2list1matrix* bupper,
42013      sluv2densetrail* dtrail,
42014      ae_bool densificationsupported,
42015      ae_state *_state)
42016 {
42017     ae_int_t n;
42018     ae_int_t k;
42019     ae_int_t i;
42020     ae_int_t j;
42021     ae_int_t i0;
42022     ae_int_t i1;
42023     double v1;
42024     ae_int_t densifyabove;
42025     ae_int_t nnz;
42026     ae_int_t entry;
42027     ae_int_t newentry;
42028     ae_int_t pprev;
42029     ae_int_t pnext;
42030     ae_int_t p;
42031     ae_int_t nexti;
42032     ae_int_t newoffs;
42033 
42034 
42035     n = a->n;
42036     k = a->k;
42037     ae_assert(k<n, "SparseTrailPivotOut: integrity check failed", _state);
42038     densifyabove = ae_round(sptrf_densebnd*(n-k), _state)+1;
42039     ae_assert(v0i->cnt>=nz0+1, "SparseTrailUpdate: integrity check failed", _state);
42040     ae_assert(v0r->cnt>=nz0+1, "SparseTrailUpdate: integrity check failed", _state);
42041     v0i->ptr.p_int[nz0] = -1;
42042     v0r->ptr.p_double[nz0] = (double)(0);
42043 
42044     /*
42045      * Update sparse representation
42046      */
42047     ivectorgrowto(&a->slsidx, (a->slsused+nz0*nz1)*sptrf_slswidth, _state);
42048     rvectorgrowto(&a->slsval, a->slsused+nz0*nz1, _state);
42049     for(j=0; j<=nz1-1; j++)
42050     {
42051         if( nz0==0 )
42052         {
42053             continue;
42054         }
42055         i1 = v1i->ptr.p_int[j];
42056         v1 = v1r->ptr.p_double[j];
42057 
42058         /*
42059          * Update column #I1
42060          */
42061         nnz = a->nzc.ptr.p_int[i1];
42062         i = 0;
42063         i0 = v0i->ptr.p_int[i];
42064         entry = a->slscolptr.ptr.p_int[i1];
42065         pprev = -1;
42066         while(i<nz0)
42067         {
42068 
42069             /*
42070              * Handle possible fill-in happening BEFORE already existing
42071              * entry of the column list (or simply fill-in, if no entry
42072              * is present).
42073              */
42074             pnext = entry;
42075             if( entry>=0 )
42076             {
42077                 nexti = a->slsidx.ptr.p_int[entry*sptrf_slswidth+4];
42078             }
42079             else
42080             {
42081                 nexti = n+1;
42082             }
42083             while(i<nz0)
42084             {
42085                 if( i0>=nexti )
42086                 {
42087                     break;
42088                 }
42089 
42090                 /*
42091                  * Allocate new entry, store column/row/value
42092                  */
42093                 newentry = a->slsused;
42094                 a->slsused = newentry+1;
42095                 nnz = nnz+1;
42096                 newoffs = newentry*sptrf_slswidth;
42097                 a->slsidx.ptr.p_int[newoffs+4] = i0;
42098                 a->slsidx.ptr.p_int[newoffs+5] = i1;
42099                 a->slsval.ptr.p_double[newentry] = -v1*v0r->ptr.p_double[i];
42100 
42101                 /*
42102                  * Insert entry into column list
42103                  */
42104                 a->slsidx.ptr.p_int[newoffs+0] = pprev;
42105                 a->slsidx.ptr.p_int[newoffs+1] = pnext;
42106                 if( pprev>=0 )
42107                 {
42108                     a->slsidx.ptr.p_int[pprev*sptrf_slswidth+1] = newentry;
42109                 }
42110                 else
42111                 {
42112                     a->slscolptr.ptr.p_int[i1] = newentry;
42113                 }
42114                 if( entry>=0 )
42115                 {
42116                     a->slsidx.ptr.p_int[entry*sptrf_slswidth+0] = newentry;
42117                 }
42118 
42119                 /*
42120                  * Insert entry into row list
42121                  */
42122                 p = a->slsrowptr.ptr.p_int[i0];
42123                 a->slsidx.ptr.p_int[newoffs+2] = -1;
42124                 a->slsidx.ptr.p_int[newoffs+3] = p;
42125                 if( p>=0 )
42126                 {
42127                     a->slsidx.ptr.p_int[p*sptrf_slswidth+2] = newentry;
42128                 }
42129                 a->slsrowptr.ptr.p_int[i0] = newentry;
42130 
42131                 /*
42132                  * Advance pointers
42133                  */
42134                 pprev = newentry;
42135                 i = i+1;
42136                 i0 = v0i->ptr.p_int[i];
42137             }
42138             if( i>=nz0 )
42139             {
42140                 break;
42141             }
42142 
42143             /*
42144              * Update already existing entry of the column list, if needed
42145              */
42146             if( entry>=0 )
42147             {
42148                 if( i0==nexti )
42149                 {
42150                     a->slsval.ptr.p_double[entry] = a->slsval.ptr.p_double[entry]-v1*v0r->ptr.p_double[i];
42151                     i = i+1;
42152                     i0 = v0i->ptr.p_int[i];
42153                 }
42154                 pprev = entry;
42155             }
42156 
42157             /*
42158              * Advance to the next pre-existing entry (if present)
42159              */
42160             if( entry>=0 )
42161             {
42162                 entry = a->slsidx.ptr.p_int[entry*sptrf_slswidth+1];
42163             }
42164         }
42165         a->nzc.ptr.p_int[i1] = nnz;
42166 
42167         /*
42168          * Densify column if needed
42169          */
42170         if( (densificationsupported&&nnz>densifyabove)&&!a->isdensified.ptr.p_bool[i1] )
42171         {
42172             sptrf_sparsetraildensify(a, i1, bupper, dtrail, _state);
42173         }
42174     }
42175 }
42176 
42177 
_sluv2list1matrix_init(void * _p,ae_state * _state,ae_bool make_automatic)42178 void _sluv2list1matrix_init(void* _p, ae_state *_state, ae_bool make_automatic)
42179 {
42180     sluv2list1matrix *p = (sluv2list1matrix*)_p;
42181     ae_touch_ptr((void*)p);
42182     ae_vector_init(&p->idxfirst, 0, DT_INT, _state, make_automatic);
42183     ae_vector_init(&p->strgidx, 0, DT_INT, _state, make_automatic);
42184     ae_vector_init(&p->strgval, 0, DT_REAL, _state, make_automatic);
42185 }
42186 
42187 
_sluv2list1matrix_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)42188 void _sluv2list1matrix_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
42189 {
42190     sluv2list1matrix *dst = (sluv2list1matrix*)_dst;
42191     sluv2list1matrix *src = (sluv2list1matrix*)_src;
42192     dst->nfixed = src->nfixed;
42193     dst->ndynamic = src->ndynamic;
42194     ae_vector_init_copy(&dst->idxfirst, &src->idxfirst, _state, make_automatic);
42195     ae_vector_init_copy(&dst->strgidx, &src->strgidx, _state, make_automatic);
42196     ae_vector_init_copy(&dst->strgval, &src->strgval, _state, make_automatic);
42197     dst->nallocated = src->nallocated;
42198     dst->nused = src->nused;
42199 }
42200 
42201 
_sluv2list1matrix_clear(void * _p)42202 void _sluv2list1matrix_clear(void* _p)
42203 {
42204     sluv2list1matrix *p = (sluv2list1matrix*)_p;
42205     ae_touch_ptr((void*)p);
42206     ae_vector_clear(&p->idxfirst);
42207     ae_vector_clear(&p->strgidx);
42208     ae_vector_clear(&p->strgval);
42209 }
42210 
42211 
_sluv2list1matrix_destroy(void * _p)42212 void _sluv2list1matrix_destroy(void* _p)
42213 {
42214     sluv2list1matrix *p = (sluv2list1matrix*)_p;
42215     ae_touch_ptr((void*)p);
42216     ae_vector_destroy(&p->idxfirst);
42217     ae_vector_destroy(&p->strgidx);
42218     ae_vector_destroy(&p->strgval);
42219 }
42220 
42221 
_sluv2sparsetrail_init(void * _p,ae_state * _state,ae_bool make_automatic)42222 void _sluv2sparsetrail_init(void* _p, ae_state *_state, ae_bool make_automatic)
42223 {
42224     sluv2sparsetrail *p = (sluv2sparsetrail*)_p;
42225     ae_touch_ptr((void*)p);
42226     ae_vector_init(&p->nzc, 0, DT_INT, _state, make_automatic);
42227     ae_vector_init(&p->wrkset, 0, DT_INT, _state, make_automatic);
42228     ae_vector_init(&p->colid, 0, DT_INT, _state, make_automatic);
42229     ae_vector_init(&p->isdensified, 0, DT_BOOL, _state, make_automatic);
42230     ae_vector_init(&p->slscolptr, 0, DT_INT, _state, make_automatic);
42231     ae_vector_init(&p->slsrowptr, 0, DT_INT, _state, make_automatic);
42232     ae_vector_init(&p->slsidx, 0, DT_INT, _state, make_automatic);
42233     ae_vector_init(&p->slsval, 0, DT_REAL, _state, make_automatic);
42234     ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
42235 }
42236 
42237 
_sluv2sparsetrail_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)42238 void _sluv2sparsetrail_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
42239 {
42240     sluv2sparsetrail *dst = (sluv2sparsetrail*)_dst;
42241     sluv2sparsetrail *src = (sluv2sparsetrail*)_src;
42242     dst->n = src->n;
42243     dst->k = src->k;
42244     ae_vector_init_copy(&dst->nzc, &src->nzc, _state, make_automatic);
42245     dst->maxwrkcnt = src->maxwrkcnt;
42246     dst->maxwrknz = src->maxwrknz;
42247     dst->wrkcnt = src->wrkcnt;
42248     ae_vector_init_copy(&dst->wrkset, &src->wrkset, _state, make_automatic);
42249     ae_vector_init_copy(&dst->colid, &src->colid, _state, make_automatic);
42250     ae_vector_init_copy(&dst->isdensified, &src->isdensified, _state, make_automatic);
42251     ae_vector_init_copy(&dst->slscolptr, &src->slscolptr, _state, make_automatic);
42252     ae_vector_init_copy(&dst->slsrowptr, &src->slsrowptr, _state, make_automatic);
42253     ae_vector_init_copy(&dst->slsidx, &src->slsidx, _state, make_automatic);
42254     ae_vector_init_copy(&dst->slsval, &src->slsval, _state, make_automatic);
42255     dst->slsused = src->slsused;
42256     ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
42257 }
42258 
42259 
_sluv2sparsetrail_clear(void * _p)42260 void _sluv2sparsetrail_clear(void* _p)
42261 {
42262     sluv2sparsetrail *p = (sluv2sparsetrail*)_p;
42263     ae_touch_ptr((void*)p);
42264     ae_vector_clear(&p->nzc);
42265     ae_vector_clear(&p->wrkset);
42266     ae_vector_clear(&p->colid);
42267     ae_vector_clear(&p->isdensified);
42268     ae_vector_clear(&p->slscolptr);
42269     ae_vector_clear(&p->slsrowptr);
42270     ae_vector_clear(&p->slsidx);
42271     ae_vector_clear(&p->slsval);
42272     ae_vector_clear(&p->tmp0);
42273 }
42274 
42275 
_sluv2sparsetrail_destroy(void * _p)42276 void _sluv2sparsetrail_destroy(void* _p)
42277 {
42278     sluv2sparsetrail *p = (sluv2sparsetrail*)_p;
42279     ae_touch_ptr((void*)p);
42280     ae_vector_destroy(&p->nzc);
42281     ae_vector_destroy(&p->wrkset);
42282     ae_vector_destroy(&p->colid);
42283     ae_vector_destroy(&p->isdensified);
42284     ae_vector_destroy(&p->slscolptr);
42285     ae_vector_destroy(&p->slsrowptr);
42286     ae_vector_destroy(&p->slsidx);
42287     ae_vector_destroy(&p->slsval);
42288     ae_vector_destroy(&p->tmp0);
42289 }
42290 
42291 
_sluv2densetrail_init(void * _p,ae_state * _state,ae_bool make_automatic)42292 void _sluv2densetrail_init(void* _p, ae_state *_state, ae_bool make_automatic)
42293 {
42294     sluv2densetrail *p = (sluv2densetrail*)_p;
42295     ae_touch_ptr((void*)p);
42296     ae_matrix_init(&p->d, 0, 0, DT_REAL, _state, make_automatic);
42297     ae_vector_init(&p->did, 0, DT_INT, _state, make_automatic);
42298 }
42299 
42300 
_sluv2densetrail_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)42301 void _sluv2densetrail_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
42302 {
42303     sluv2densetrail *dst = (sluv2densetrail*)_dst;
42304     sluv2densetrail *src = (sluv2densetrail*)_src;
42305     dst->n = src->n;
42306     dst->ndense = src->ndense;
42307     ae_matrix_init_copy(&dst->d, &src->d, _state, make_automatic);
42308     ae_vector_init_copy(&dst->did, &src->did, _state, make_automatic);
42309 }
42310 
42311 
_sluv2densetrail_clear(void * _p)42312 void _sluv2densetrail_clear(void* _p)
42313 {
42314     sluv2densetrail *p = (sluv2densetrail*)_p;
42315     ae_touch_ptr((void*)p);
42316     ae_matrix_clear(&p->d);
42317     ae_vector_clear(&p->did);
42318 }
42319 
42320 
_sluv2densetrail_destroy(void * _p)42321 void _sluv2densetrail_destroy(void* _p)
42322 {
42323     sluv2densetrail *p = (sluv2densetrail*)_p;
42324     ae_touch_ptr((void*)p);
42325     ae_matrix_destroy(&p->d);
42326     ae_vector_destroy(&p->did);
42327 }
42328 
42329 
_sluv2buffer_init(void * _p,ae_state * _state,ae_bool make_automatic)42330 void _sluv2buffer_init(void* _p, ae_state *_state, ae_bool make_automatic)
42331 {
42332     sluv2buffer *p = (sluv2buffer*)_p;
42333     ae_touch_ptr((void*)p);
42334     _sparsematrix_init(&p->sparsel, _state, make_automatic);
42335     _sparsematrix_init(&p->sparseut, _state, make_automatic);
42336     _sluv2list1matrix_init(&p->bleft, _state, make_automatic);
42337     _sluv2list1matrix_init(&p->bupper, _state, make_automatic);
42338     _sluv2sparsetrail_init(&p->strail, _state, make_automatic);
42339     _sluv2densetrail_init(&p->dtrail, _state, make_automatic);
42340     ae_vector_init(&p->rowpermrawidx, 0, DT_INT, _state, make_automatic);
42341     ae_matrix_init(&p->dbuf, 0, 0, DT_REAL, _state, make_automatic);
42342     ae_vector_init(&p->v0i, 0, DT_INT, _state, make_automatic);
42343     ae_vector_init(&p->v1i, 0, DT_INT, _state, make_automatic);
42344     ae_vector_init(&p->v0r, 0, DT_REAL, _state, make_automatic);
42345     ae_vector_init(&p->v1r, 0, DT_REAL, _state, make_automatic);
42346     ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
42347     ae_vector_init(&p->tmpi, 0, DT_INT, _state, make_automatic);
42348     ae_vector_init(&p->tmpp, 0, DT_INT, _state, make_automatic);
42349 }
42350 
42351 
_sluv2buffer_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)42352 void _sluv2buffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
42353 {
42354     sluv2buffer *dst = (sluv2buffer*)_dst;
42355     sluv2buffer *src = (sluv2buffer*)_src;
42356     dst->n = src->n;
42357     _sparsematrix_init_copy(&dst->sparsel, &src->sparsel, _state, make_automatic);
42358     _sparsematrix_init_copy(&dst->sparseut, &src->sparseut, _state, make_automatic);
42359     _sluv2list1matrix_init_copy(&dst->bleft, &src->bleft, _state, make_automatic);
42360     _sluv2list1matrix_init_copy(&dst->bupper, &src->bupper, _state, make_automatic);
42361     _sluv2sparsetrail_init_copy(&dst->strail, &src->strail, _state, make_automatic);
42362     _sluv2densetrail_init_copy(&dst->dtrail, &src->dtrail, _state, make_automatic);
42363     ae_vector_init_copy(&dst->rowpermrawidx, &src->rowpermrawidx, _state, make_automatic);
42364     ae_matrix_init_copy(&dst->dbuf, &src->dbuf, _state, make_automatic);
42365     ae_vector_init_copy(&dst->v0i, &src->v0i, _state, make_automatic);
42366     ae_vector_init_copy(&dst->v1i, &src->v1i, _state, make_automatic);
42367     ae_vector_init_copy(&dst->v0r, &src->v0r, _state, make_automatic);
42368     ae_vector_init_copy(&dst->v1r, &src->v1r, _state, make_automatic);
42369     ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
42370     ae_vector_init_copy(&dst->tmpi, &src->tmpi, _state, make_automatic);
42371     ae_vector_init_copy(&dst->tmpp, &src->tmpp, _state, make_automatic);
42372 }
42373 
42374 
_sluv2buffer_clear(void * _p)42375 void _sluv2buffer_clear(void* _p)
42376 {
42377     sluv2buffer *p = (sluv2buffer*)_p;
42378     ae_touch_ptr((void*)p);
42379     _sparsematrix_clear(&p->sparsel);
42380     _sparsematrix_clear(&p->sparseut);
42381     _sluv2list1matrix_clear(&p->bleft);
42382     _sluv2list1matrix_clear(&p->bupper);
42383     _sluv2sparsetrail_clear(&p->strail);
42384     _sluv2densetrail_clear(&p->dtrail);
42385     ae_vector_clear(&p->rowpermrawidx);
42386     ae_matrix_clear(&p->dbuf);
42387     ae_vector_clear(&p->v0i);
42388     ae_vector_clear(&p->v1i);
42389     ae_vector_clear(&p->v0r);
42390     ae_vector_clear(&p->v1r);
42391     ae_vector_clear(&p->tmp0);
42392     ae_vector_clear(&p->tmpi);
42393     ae_vector_clear(&p->tmpp);
42394 }
42395 
42396 
_sluv2buffer_destroy(void * _p)42397 void _sluv2buffer_destroy(void* _p)
42398 {
42399     sluv2buffer *p = (sluv2buffer*)_p;
42400     ae_touch_ptr((void*)p);
42401     _sparsematrix_destroy(&p->sparsel);
42402     _sparsematrix_destroy(&p->sparseut);
42403     _sluv2list1matrix_destroy(&p->bleft);
42404     _sluv2list1matrix_destroy(&p->bupper);
42405     _sluv2sparsetrail_destroy(&p->strail);
42406     _sluv2densetrail_destroy(&p->dtrail);
42407     ae_vector_destroy(&p->rowpermrawidx);
42408     ae_matrix_destroy(&p->dbuf);
42409     ae_vector_destroy(&p->v0i);
42410     ae_vector_destroy(&p->v1i);
42411     ae_vector_destroy(&p->v0r);
42412     ae_vector_destroy(&p->v1r);
42413     ae_vector_destroy(&p->tmp0);
42414     ae_vector_destroy(&p->tmpi);
42415     ae_vector_destroy(&p->tmpp);
42416 }
42417 
42418 
42419 #endif
42420 #if defined(AE_COMPILE_AMDORDERING) || !defined(AE_PARTIAL_BUILD)
42421 
42422 
42423 /*************************************************************************
42424 This function generates approximate minimum degree ordering
42425 
42426 INPUT PARAMETERS
42427     A           -   lower triangular sparse matrix  in  CRS  format.  Only
42428                     sparsity structure (as given by Idx[] field)  matters,
42429                     specific values of matrix elements are ignored.
42430     N           -   problem size
42431     Buf         -   reusable buffer object, does not need special initialization
42432 
42433 OUTPUT PARAMETERS
42434     Perm        -   array[N], maps original indexes I to permuted indexes
42435     InvPerm     -   array[N], maps permuted indexes I to original indexes
42436 
42437 NOTE: definite 'DEBUG.SLOW' trace tag will  activate  extra-slow  (roughly
42438       N^3 ops) integrity checks, in addition to cheap O(1) ones.
42439 
42440   -- ALGLIB PROJECT --
42441      Copyright 05.10.2020 by Bochkanov Sergey.
42442 *************************************************************************/
generateamdpermutation(sparsematrix * a,ae_int_t n,ae_vector * perm,ae_vector * invperm,amdbuffer * buf,ae_state * _state)42443 void generateamdpermutation(sparsematrix* a,
42444      ae_int_t n,
42445      /* Integer */ ae_vector* perm,
42446      /* Integer */ ae_vector* invperm,
42447      amdbuffer* buf,
42448      ae_state *_state)
42449 {
42450     ae_int_t r;
42451 
42452 
42453     r = generateamdpermutationx(a, n, perm, invperm, 0, buf, _state);
42454     ae_assert(r==n, "GenerateAMDPermutation: integrity check failed, the matrix is only partially processed", _state);
42455 }
42456 
42457 
42458 /*************************************************************************
42459 This  function  generates  approximate  minimum  degree ordering,   either
42460 classic or improved with better support for dense rows:
42461 * the classic version processed entire matrix and returns N as result. The
42462   problem with classic version is that it may be slow  for  matrices  with
42463   dense or nearly dense rows
42464 * the improved version processes K most sparse rows, and moves  other  N-K
42465   ones to the end. The number of sparse rows  K  is  returned.  The  tail,
42466   which is now a (N-K)*(N-K) matrix, should be repeatedly processed by the
42467   same function until zero is returned.
42468 
42469 INPUT PARAMETERS
42470     A           -   lower triangular sparse matrix in CRS format
42471     N           -   problem size
42472     AMDType     -   ordering type:
42473                     * 0 for the classic AMD
42474                     * 1 for the improved AMD
42475     Buf         -   reusable buffer object, does not need special initialization
42476 
42477 OUTPUT PARAMETERS
42478     Perm        -   array[N], maps original indexes I to permuted indexes
42479     InvPerm     -   array[N], maps permuted indexes I to original indexes
42480 
42481 RESULT:
42482     number of successfully ordered rows/cols;
42483     N for AMDType=0, 0<Result<=N for AMDType=1
42484 
42485 NOTE: defining 'DEBUG.SLOW' trace tag will  activate  extra-slow  (roughly
42486       N^3 ops) integrity checks, in addition to cheap O(1) ones.
42487 
42488   -- ALGLIB PROJECT --
42489      Copyright 05.10.2020 by Bochkanov Sergey.
42490 *************************************************************************/
generateamdpermutationx(sparsematrix * a,ae_int_t n,ae_vector * perm,ae_vector * invperm,ae_int_t amdtype,amdbuffer * buf,ae_state * _state)42491 ae_int_t generateamdpermutationx(sparsematrix* a,
42492      ae_int_t n,
42493      /* Integer */ ae_vector* perm,
42494      /* Integer */ ae_vector* invperm,
42495      ae_int_t amdtype,
42496      amdbuffer* buf,
42497      ae_state *_state)
42498 {
42499     ae_int_t i;
42500     ae_int_t j;
42501     ae_int_t k;
42502     ae_int_t p;
42503     ae_int_t setprealloc;
42504     ae_int_t inithashbucketsize;
42505     ae_bool extendeddebug;
42506     ae_int_t nodesize;
42507     ae_int_t cnt0;
42508     ae_int_t cnt1;
42509     ae_int_t tau;
42510     double meand;
42511     ae_int_t d;
42512     ae_int_t result;
42513 
42514 
42515     ae_assert(amdtype==0||amdtype==1, "GenerateAMDPermutationX: unexpected ordering type", _state);
42516     setprealloc = 3;
42517     inithashbucketsize = 16;
42518     extendeddebug = ae_is_trace_enabled("DEBUG.SLOW")&&n<=100;
42519     result = n;
42520     buf->n = n;
42521     buf->checkexactdegrees = extendeddebug;
42522     buf->extendeddebug = extendeddebug;
42523     amdordering_mtxinit(n, &buf->mtxl, _state);
42524     amdordering_knsinitfroma(a, n, &buf->seta, _state);
42525     amdordering_knsinit(n, n, setprealloc, &buf->setsuper, _state);
42526     for(i=0; i<=n-1; i++)
42527     {
42528         amdordering_knsaddnewelement(&buf->setsuper, i, i, _state);
42529     }
42530     amdordering_knsinit(n, n, setprealloc, &buf->sete, _state);
42531     amdordering_knsinit(n, n, inithashbucketsize, &buf->hashbuckets, _state);
42532     amdordering_nsinitemptyslow(n, &buf->nonemptybuckets, _state);
42533     ivectorsetlengthatleast(&buf->perm, n, _state);
42534     ivectorsetlengthatleast(&buf->invperm, n, _state);
42535     ivectorsetlengthatleast(&buf->columnswaps, n, _state);
42536     for(i=0; i<=n-1; i++)
42537     {
42538         buf->perm.ptr.p_int[i] = i;
42539         buf->invperm.ptr.p_int[i] = i;
42540         buf->columnswaps.ptr.p_int[i] = i;
42541     }
42542     amdordering_vtxinit(a, n, buf->checkexactdegrees, &buf->vertexdegrees, _state);
42543     bsetallocv(n, ae_true, &buf->issupernode, _state);
42544     bsetallocv(n, ae_false, &buf->iseliminated, _state);
42545     isetallocv(n, -1, &buf->arrwe, _state);
42546     if( extendeddebug )
42547     {
42548         ae_matrix_set_length(&buf->dbga, n, n, _state);
42549         for(i=0; i<=n-1; i++)
42550         {
42551             for(j=0; j<=n-1; j++)
42552             {
42553                 if( (j<=i&&sparseexists(a, i, j, _state))||(j>=i&&sparseexists(a, j, i, _state)) )
42554                 {
42555                     buf->dbga.ptr.pp_double[i][j] = 0.1/n*(ae_sin(i+0.17, _state)+ae_cos(ae_sqrt(j+0.65, _state), _state));
42556                 }
42557                 else
42558                 {
42559                     buf->dbga.ptr.pp_double[i][j] = (double)(0);
42560                 }
42561             }
42562         }
42563         for(i=0; i<=n-1; i++)
42564         {
42565             buf->dbga.ptr.pp_double[i][i] = (double)(1);
42566         }
42567     }
42568     tau = 0;
42569     if( amdtype==1 )
42570     {
42571         meand = 0.0;
42572         for(i=0; i<=n-1; i++)
42573         {
42574             d = amdordering_vtxgetapprox(&buf->vertexdegrees, i, _state);
42575             meand = meand+d;
42576         }
42577         meand = meand/n;
42578         tau = ae_round(10*meand, _state)+2;
42579     }
42580     ivectorsetlengthatleast(&buf->ls, n, _state);
42581     amdordering_nsinitemptyslow(n, &buf->setp, _state);
42582     amdordering_nsinitemptyslow(n, &buf->lp, _state);
42583     amdordering_nsinitemptyslow(n, &buf->setrp, _state);
42584     amdordering_nsinitemptyslow(n, &buf->ep, _state);
42585     amdordering_nsinitemptyslow(n, &buf->exactdegreetmp0, _state);
42586     amdordering_nsinitemptyslow(n, &buf->adji, _state);
42587     amdordering_nsinitemptyslow(n, &buf->adjj, _state);
42588     amdordering_nsinitemptyslow(n, &buf->setq, _state);
42589     amdordering_nsinitemptyslow(n, &buf->setqsupercand, _state);
42590     k = 0;
42591     while(k<n-amdordering_nscount(&buf->setq, _state))
42592     {
42593         amdordering_amdselectpivotelement(buf, k, &p, &nodesize, _state);
42594         amdordering_amdcomputelp(buf, p, _state);
42595         amdordering_amdmasselimination(buf, p, k, tau, _state);
42596         amdordering_nsstartenumeration(&buf->setqsupercand, _state);
42597         while(amdordering_nsenumerate(&buf->setqsupercand, &j, _state))
42598         {
42599             ae_assert(j!=p, "AMD: integrity check 9464 failed", _state);
42600             ae_assert(buf->issupernode.ptr.p_bool[j], "AMD: integrity check 6284 failed", _state);
42601             ae_assert(!buf->iseliminated.ptr.p_bool[j], "AMD: integrity check 3858 failed", _state);
42602             amdordering_knsstartenumeration(&buf->setsuper, j, _state);
42603             while(amdordering_knsenumerate(&buf->setsuper, &i, _state))
42604             {
42605                 amdordering_nsaddelement(&buf->setq, i, _state);
42606             }
42607             amdordering_knsclearkthreclaim(&buf->seta, j, _state);
42608             amdordering_knsclearkthreclaim(&buf->sete, j, _state);
42609             buf->issupernode.ptr.p_bool[j] = ae_false;
42610             amdordering_vtxremovevertex(&buf->vertexdegrees, j, _state);
42611         }
42612         amdordering_amddetectsupernodes(buf, _state);
42613         if( extendeddebug )
42614         {
42615             ae_assert(buf->checkexactdegrees, "AMD: extended debug needs exact degrees", _state);
42616             for(i=k; i<=k+nodesize-1; i++)
42617             {
42618                 if( buf->columnswaps.ptr.p_int[i]!=i )
42619                 {
42620                     swaprows(&buf->dbga, i, buf->columnswaps.ptr.p_int[i], n, _state);
42621                     swapcols(&buf->dbga, i, buf->columnswaps.ptr.p_int[i], n, _state);
42622                 }
42623             }
42624             for(i=0; i<=nodesize-1; i++)
42625             {
42626                 rmatrixgemm(n-k-i, n-k-i, k+i, -1.0, &buf->dbga, k+i, 0, 0, &buf->dbga, 0, k+i, 0, 1.0, &buf->dbga, k+i, k+i, _state);
42627             }
42628             cnt0 = amdordering_nscount(&buf->lp, _state);
42629             cnt1 = 0;
42630             for(i=k+1; i<=n-1; i++)
42631             {
42632                 if( ae_fp_neq(buf->dbga.ptr.pp_double[i][k],(double)(0)) )
42633                 {
42634                     inc(&cnt1, _state);
42635                 }
42636             }
42637             ae_assert(cnt0+nodesize-1==cnt1, "AMD: integrity check 7344 failed", _state);
42638             ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, p, _state)>=amdordering_vtxgetexact(&buf->vertexdegrees, p, _state), "AMD: integrity check for ApproxD failed", _state);
42639             ae_assert(amdordering_vtxgetexact(&buf->vertexdegrees, p, _state)==cnt0, "AMD: integrity check for ExactD failed", _state);
42640         }
42641         ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, p, _state)>=amdordering_nscount(&buf->lp, _state), "AMD: integrity check 7956 failed", _state);
42642         ae_assert((amdordering_knscountkth(&buf->sete, p, _state)>2||amdordering_nscount(&buf->setq, _state)>0)||amdordering_vtxgetapprox(&buf->vertexdegrees, p, _state)==amdordering_nscount(&buf->lp, _state), "AMD: integrity check 7295 failed", _state);
42643         amdordering_knsstartenumeration(&buf->sete, p, _state);
42644         while(amdordering_knsenumerate(&buf->sete, &j, _state))
42645         {
42646             amdordering_mtxclearcolumn(&buf->mtxl, j, _state);
42647         }
42648         amdordering_knsstartenumeration(&buf->setsuper, p, _state);
42649         while(amdordering_knsenumerate(&buf->setsuper, &j, _state))
42650         {
42651             buf->iseliminated.ptr.p_bool[j] = ae_true;
42652             amdordering_mtxclearrow(&buf->mtxl, j, _state);
42653         }
42654         amdordering_knsclearkthreclaim(&buf->seta, p, _state);
42655         amdordering_knsclearkthreclaim(&buf->sete, p, _state);
42656         buf->issupernode.ptr.p_bool[p] = ae_false;
42657         amdordering_vtxremovevertex(&buf->vertexdegrees, p, _state);
42658         k = k+nodesize;
42659     }
42660     ae_assert(k+amdordering_nscount(&buf->setq, _state)==n, "AMD: integrity check 6326 failed", _state);
42661     ae_assert(k>0, "AMD: integrity check 9463 failed", _state);
42662     result = k;
42663     ivectorsetlengthatleast(perm, n, _state);
42664     ivectorsetlengthatleast(invperm, n, _state);
42665     for(i=0; i<=n-1; i++)
42666     {
42667         perm->ptr.p_int[i] = buf->perm.ptr.p_int[i];
42668         invperm->ptr.p_int[i] = buf->invperm.ptr.p_int[i];
42669     }
42670     return result;
42671 }
42672 
42673 
42674 /*************************************************************************
42675 Initializes n-set by empty structure.
42676 
42677 IMPORTANT: this function need O(N) time for initialization. It is recommended
42678            to reduce its usage as much as possible, and use nsClear()
42679            where possible.
42680 
42681 INPUT PARAMETERS
42682     N           -   possible set size
42683 
42684 OUTPUT PARAMETERS
42685     SA          -   empty N-set
42686 
42687   -- ALGLIB PROJECT --
42688      Copyright 05.10.2020 by Bochkanov Sergey.
42689 *************************************************************************/
amdordering_nsinitemptyslow(ae_int_t n,amdnset * sa,ae_state * _state)42690 static void amdordering_nsinitemptyslow(ae_int_t n,
42691      amdnset* sa,
42692      ae_state *_state)
42693 {
42694 
42695 
42696     sa->n = n;
42697     sa->nstored = 0;
42698     isetallocv(n, -999999999, &sa->locationof, _state);
42699     isetallocv(n, -999999999, &sa->items, _state);
42700 }
42701 
42702 
42703 /*************************************************************************
42704 Copies n-set to properly initialized target set. The target set has to  be
42705 properly initialized, and it can be non-empty. If  it  is  non-empty,  its
42706 contents is quickly erased before copying.
42707 
42708 The cost of this function is O(max(SrcSize,DstSize))
42709 
42710 INPUT PARAMETERS
42711     SSrc        -   source N-set
42712     SDst        -   destination N-set (has same size as SSrc)
42713 
42714 OUTPUT PARAMETERS
42715     SDst        -   copy of SSrc
42716 
42717   -- ALGLIB PROJECT --
42718      Copyright 05.10.2020 by Bochkanov Sergey.
42719 *************************************************************************/
amdordering_nscopy(amdnset * ssrc,amdnset * sdst,ae_state * _state)42720 static void amdordering_nscopy(amdnset* ssrc,
42721      amdnset* sdst,
42722      ae_state *_state)
42723 {
42724     ae_int_t ns;
42725     ae_int_t i;
42726     ae_int_t k;
42727 
42728 
42729     amdordering_nsclear(sdst, _state);
42730     ns = ssrc->nstored;
42731     for(i=0; i<=ns-1; i++)
42732     {
42733         k = ssrc->items.ptr.p_int[i];
42734         sdst->items.ptr.p_int[i] = k;
42735         sdst->locationof.ptr.p_int[k] = i;
42736     }
42737     sdst->nstored = ns;
42738 }
42739 
42740 
42741 /*************************************************************************
42742 Add K-th element to the set. The element may already exist in the set.
42743 
42744 INPUT PARAMETERS
42745     SA          -   set
42746     K           -   element to add, 0<=K<N.
42747 
42748 OUTPUT PARAMETERS
42749     SA          -   modified SA
42750 
42751   -- ALGLIB PROJECT --
42752      Copyright 05.10.2020 by Bochkanov Sergey.
42753 *************************************************************************/
amdordering_nsaddelement(amdnset * sa,ae_int_t k,ae_state * _state)42754 static void amdordering_nsaddelement(amdnset* sa,
42755      ae_int_t k,
42756      ae_state *_state)
42757 {
42758     ae_int_t ns;
42759 
42760 
42761     if( sa->locationof.ptr.p_int[k]>=0 )
42762     {
42763         return;
42764     }
42765     ns = sa->nstored;
42766     sa->locationof.ptr.p_int[k] = ns;
42767     sa->items.ptr.p_int[ns] = k;
42768     sa->nstored = ns+1;
42769 }
42770 
42771 
42772 /*************************************************************************
42773 Add K-th set from the source kn-set
42774 
42775 INPUT PARAMETERS
42776     SA          -   set
42777     Src, K      -   source kn-set and set index K
42778 
42779 OUTPUT PARAMETERS
42780     SA          -   modified SA
42781 
42782   -- ALGLIB PROJECT --
42783      Copyright 05.10.2020 by Bochkanov Sergey.
42784 *************************************************************************/
amdordering_nsaddkth(amdnset * sa,amdknset * src,ae_int_t k,ae_state * _state)42785 static void amdordering_nsaddkth(amdnset* sa,
42786      amdknset* src,
42787      ae_int_t k,
42788      ae_state *_state)
42789 {
42790     ae_int_t idxbegin;
42791     ae_int_t idxend;
42792     ae_int_t j;
42793     ae_int_t ns;
42794 
42795 
42796     idxbegin = src->vbegin.ptr.p_int[k];
42797     idxend = idxbegin+src->vcnt.ptr.p_int[k];
42798     ns = sa->nstored;
42799     while(idxbegin<idxend)
42800     {
42801         j = src->data.ptr.p_int[idxbegin];
42802         if( sa->locationof.ptr.p_int[j]<0 )
42803         {
42804             sa->locationof.ptr.p_int[j] = ns;
42805             sa->items.ptr.p_int[ns] = j;
42806             ns = ns+1;
42807         }
42808         idxbegin = idxbegin+1;
42809     }
42810     sa->nstored = ns;
42811 }
42812 
42813 
42814 /*************************************************************************
42815 Subtracts K-th set from the source structure
42816 
42817 INPUT PARAMETERS
42818     SA          -   set
42819     Src, K      -   source kn-set and set index K
42820 
42821 OUTPUT PARAMETERS
42822     SA          -   modified SA
42823 
42824   -- ALGLIB PROJECT --
42825      Copyright 05.10.2020 by Bochkanov Sergey.
42826 *************************************************************************/
amdordering_nssubtract1(amdnset * sa,amdnset * src,ae_state * _state)42827 static void amdordering_nssubtract1(amdnset* sa,
42828      amdnset* src,
42829      ae_state *_state)
42830 {
42831     ae_int_t i;
42832     ae_int_t j;
42833     ae_int_t loc;
42834     ae_int_t item;
42835     ae_int_t ns;
42836     ae_int_t ss;
42837 
42838 
42839     ns = sa->nstored;
42840     ss = src->nstored;
42841     if( ss<ns )
42842     {
42843         for(i=0; i<=ss-1; i++)
42844         {
42845             j = src->items.ptr.p_int[i];
42846             loc = sa->locationof.ptr.p_int[j];
42847             if( loc>=0 )
42848             {
42849                 item = sa->items.ptr.p_int[ns-1];
42850                 sa->items.ptr.p_int[loc] = item;
42851                 sa->locationof.ptr.p_int[item] = loc;
42852                 sa->locationof.ptr.p_int[j] = -1;
42853                 ns = ns-1;
42854             }
42855         }
42856     }
42857     else
42858     {
42859         i = 0;
42860         while(i<ns)
42861         {
42862             j = sa->items.ptr.p_int[i];
42863             loc = src->locationof.ptr.p_int[j];
42864             if( loc>=0 )
42865             {
42866                 item = sa->items.ptr.p_int[ns-1];
42867                 sa->items.ptr.p_int[i] = item;
42868                 sa->locationof.ptr.p_int[item] = i;
42869                 sa->locationof.ptr.p_int[j] = -1;
42870                 ns = ns-1;
42871             }
42872             else
42873             {
42874                 i = i+1;
42875             }
42876         }
42877     }
42878     sa->nstored = ns;
42879 }
42880 
42881 
42882 /*************************************************************************
42883 Subtracts K-th set from the source structure
42884 
42885 INPUT PARAMETERS
42886     SA          -   set
42887     Src, K      -   source kn-set and set index K
42888 
42889 OUTPUT PARAMETERS
42890     SA          -   modified SA
42891 
42892   -- ALGLIB PROJECT --
42893      Copyright 05.10.2020 by Bochkanov Sergey.
42894 *************************************************************************/
amdordering_nssubtractkth(amdnset * sa,amdknset * src,ae_int_t k,ae_state * _state)42895 static void amdordering_nssubtractkth(amdnset* sa,
42896      amdknset* src,
42897      ae_int_t k,
42898      ae_state *_state)
42899 {
42900     ae_int_t idxbegin;
42901     ae_int_t idxend;
42902     ae_int_t j;
42903     ae_int_t loc;
42904     ae_int_t ns;
42905     ae_int_t item;
42906 
42907 
42908     idxbegin = src->vbegin.ptr.p_int[k];
42909     idxend = idxbegin+src->vcnt.ptr.p_int[k];
42910     ns = sa->nstored;
42911     while(idxbegin<idxend)
42912     {
42913         j = src->data.ptr.p_int[idxbegin];
42914         loc = sa->locationof.ptr.p_int[j];
42915         if( loc>=0 )
42916         {
42917             item = sa->items.ptr.p_int[ns-1];
42918             sa->items.ptr.p_int[loc] = item;
42919             sa->locationof.ptr.p_int[item] = loc;
42920             sa->locationof.ptr.p_int[j] = -1;
42921             ns = ns-1;
42922         }
42923         idxbegin = idxbegin+1;
42924     }
42925     sa->nstored = ns;
42926 }
42927 
42928 
42929 /*************************************************************************
42930 Clears set
42931 
42932 INPUT PARAMETERS
42933     SA          -   set to be cleared
42934 
42935 
42936   -- ALGLIB PROJECT --
42937      Copyright 05.10.2020 by Bochkanov Sergey.
42938 *************************************************************************/
amdordering_nsclear(amdnset * sa,ae_state * _state)42939 static void amdordering_nsclear(amdnset* sa, ae_state *_state)
42940 {
42941     ae_int_t i;
42942     ae_int_t ns;
42943 
42944 
42945     ns = sa->nstored;
42946     for(i=0; i<=ns-1; i++)
42947     {
42948         sa->locationof.ptr.p_int[sa->items.ptr.p_int[i]] = -1;
42949     }
42950     sa->nstored = 0;
42951 }
42952 
42953 
42954 /*************************************************************************
42955 Counts set elements
42956 
42957 INPUT PARAMETERS
42958     SA          -   set
42959 
42960 RESULT
42961     number of elements in SA
42962 
42963   -- ALGLIB PROJECT --
42964      Copyright 05.10.2020 by Bochkanov Sergey.
42965 *************************************************************************/
amdordering_nscount(amdnset * sa,ae_state * _state)42966 static ae_int_t amdordering_nscount(amdnset* sa, ae_state *_state)
42967 {
42968     ae_int_t result;
42969 
42970 
42971     result = sa->nstored;
42972     return result;
42973 }
42974 
42975 
42976 /*************************************************************************
42977 Counts set elements not present in the K-th set of the source structure
42978 
42979 INPUT PARAMETERS
42980     SA          -   set
42981     Src, K      -   source kn-set and set index K
42982 
42983 RESULT
42984     number of elements in SA not present in Src[K]
42985 
42986   -- ALGLIB PROJECT --
42987      Copyright 05.10.2020 by Bochkanov Sergey.
42988 *************************************************************************/
amdordering_nscountnotkth(amdnset * sa,amdknset * src,ae_int_t k,ae_state * _state)42989 static ae_int_t amdordering_nscountnotkth(amdnset* sa,
42990      amdknset* src,
42991      ae_int_t k,
42992      ae_state *_state)
42993 {
42994     ae_int_t idxbegin;
42995     ae_int_t idxend;
42996     ae_int_t intersectcnt;
42997     ae_int_t result;
42998 
42999 
43000     idxbegin = src->vbegin.ptr.p_int[k];
43001     idxend = idxbegin+src->vcnt.ptr.p_int[k];
43002     intersectcnt = 0;
43003     while(idxbegin<idxend)
43004     {
43005         if( sa->locationof.ptr.p_int[src->data.ptr.p_int[idxbegin]]>=0 )
43006         {
43007             intersectcnt = intersectcnt+1;
43008         }
43009         idxbegin = idxbegin+1;
43010     }
43011     result = sa->nstored-intersectcnt;
43012     return result;
43013 }
43014 
43015 
43016 /*************************************************************************
43017 Counts set elements also present in the K-th set of the source structure
43018 
43019 INPUT PARAMETERS
43020     SA          -   set
43021     Src, K      -   source kn-set and set index K
43022 
43023 RESULT
43024     number of elements in SA also present in Src[K]
43025 
43026   -- ALGLIB PROJECT --
43027      Copyright 05.10.2020 by Bochkanov Sergey.
43028 *************************************************************************/
amdordering_nscountandkth(amdnset * sa,amdknset * src,ae_int_t k,ae_state * _state)43029 static ae_int_t amdordering_nscountandkth(amdnset* sa,
43030      amdknset* src,
43031      ae_int_t k,
43032      ae_state *_state)
43033 {
43034     ae_int_t idxbegin;
43035     ae_int_t idxend;
43036     ae_int_t result;
43037 
43038 
43039     idxbegin = src->vbegin.ptr.p_int[k];
43040     idxend = idxbegin+src->vcnt.ptr.p_int[k];
43041     result = 0;
43042     while(idxbegin<idxend)
43043     {
43044         if( sa->locationof.ptr.p_int[src->data.ptr.p_int[idxbegin]]>=0 )
43045         {
43046             result = result+1;
43047         }
43048         idxbegin = idxbegin+1;
43049     }
43050     return result;
43051 }
43052 
43053 
43054 /*************************************************************************
43055 Compare two sets, returns True for equal sets
43056 
43057 INPUT PARAMETERS
43058     S0          -   set 0
43059     S1          -   set 1, must have same parameter N as set 0
43060 
43061 RESULT
43062     True, if sets are equal
43063 
43064   -- ALGLIB PROJECT --
43065      Copyright 05.10.2020 by Bochkanov Sergey.
43066 *************************************************************************/
amdordering_nsequal(amdnset * s0,amdnset * s1,ae_state * _state)43067 static ae_bool amdordering_nsequal(amdnset* s0,
43068      amdnset* s1,
43069      ae_state *_state)
43070 {
43071     ae_int_t i;
43072     ae_int_t ns0;
43073     ae_int_t ns1;
43074     ae_bool result;
43075 
43076 
43077     result = ae_false;
43078     if( s0->n!=s1->n )
43079     {
43080         return result;
43081     }
43082     if( s0->nstored!=s1->nstored )
43083     {
43084         return result;
43085     }
43086     ns0 = s0->nstored;
43087     ns1 = s1->nstored;
43088     for(i=0; i<=ns0-1; i++)
43089     {
43090         if( s1->locationof.ptr.p_int[s0->items.ptr.p_int[i]]<0 )
43091         {
43092             return result;
43093         }
43094     }
43095     for(i=0; i<=ns1-1; i++)
43096     {
43097         if( s0->locationof.ptr.p_int[s1->items.ptr.p_int[i]]<0 )
43098         {
43099             return result;
43100         }
43101     }
43102     result = ae_true;
43103     return result;
43104 }
43105 
43106 
43107 /*************************************************************************
43108 Prepares iteration over set
43109 
43110 INPUT PARAMETERS
43111     SA          -   set
43112 
43113 OUTPUT PARAMETERS
43114     SA          -   SA ready for repeated calls of nsEnumerate()
43115 
43116   -- ALGLIB PROJECT --
43117      Copyright 05.10.2020 by Bochkanov Sergey.
43118 *************************************************************************/
amdordering_nsstartenumeration(amdnset * sa,ae_state * _state)43119 static void amdordering_nsstartenumeration(amdnset* sa, ae_state *_state)
43120 {
43121 
43122 
43123     sa->iteridx = 0;
43124 }
43125 
43126 
43127 /*************************************************************************
43128 Iterates over the set. Subsequent calls return True and set J to  new  set
43129 item until iteration stops and False is returned.
43130 
43131 INPUT PARAMETERS
43132     SA          -   n-set
43133 
43134 OUTPUT PARAMETERS
43135     J           -   if:
43136                     * Result=True - index of element in the set
43137                     * Result=False - not set
43138 
43139 
43140   -- ALGLIB PROJECT --
43141      Copyright 05.10.2020 by Bochkanov Sergey.
43142 *************************************************************************/
amdordering_nsenumerate(amdnset * sa,ae_int_t * i,ae_state * _state)43143 static ae_bool amdordering_nsenumerate(amdnset* sa,
43144      ae_int_t* i,
43145      ae_state *_state)
43146 {
43147     ae_int_t k;
43148     ae_bool result;
43149 
43150     *i = 0;
43151 
43152     k = sa->iteridx;
43153     if( k>=sa->nstored )
43154     {
43155         result = ae_false;
43156         return result;
43157     }
43158     *i = sa->items.ptr.p_int[k];
43159     sa->iteridx = k+1;
43160     result = ae_true;
43161     return result;
43162 }
43163 
43164 
43165 /*************************************************************************
43166 Compresses internal storage, reclaiming previously dropped blocks. To be
43167 used internally by kn-set modification functions.
43168 
43169 INPUT PARAMETERS
43170     SA          -   kn-set to compress
43171 
43172   -- ALGLIB PROJECT --
43173      Copyright 05.10.2020 by Bochkanov Sergey.
43174 *************************************************************************/
amdordering_knscompressstorage(amdknset * sa,ae_state * _state)43175 static void amdordering_knscompressstorage(amdknset* sa, ae_state *_state)
43176 {
43177     ae_int_t i;
43178     ae_int_t blocklen;
43179     ae_int_t setidx;
43180     ae_int_t srcoffs;
43181     ae_int_t dstoffs;
43182 
43183 
43184     srcoffs = 0;
43185     dstoffs = 0;
43186     while(srcoffs<sa->dataused)
43187     {
43188         blocklen = sa->data.ptr.p_int[srcoffs+0];
43189         setidx = sa->data.ptr.p_int[srcoffs+1];
43190         ae_assert(blocklen>=amdordering_knsheadersize, "knsCompressStorage: integrity check 6385 failed", _state);
43191         if( setidx<0 )
43192         {
43193             srcoffs = srcoffs+blocklen;
43194             continue;
43195         }
43196         if( srcoffs!=dstoffs )
43197         {
43198             for(i=0; i<=blocklen-1; i++)
43199             {
43200                 sa->data.ptr.p_int[dstoffs+i] = sa->data.ptr.p_int[srcoffs+i];
43201             }
43202             sa->vbegin.ptr.p_int[setidx] = dstoffs+amdordering_knsheadersize;
43203         }
43204         dstoffs = dstoffs+blocklen;
43205         srcoffs = srcoffs+blocklen;
43206     }
43207     ae_assert(srcoffs==sa->dataused, "knsCompressStorage: integrity check 9464 failed", _state);
43208     sa->dataused = dstoffs;
43209 }
43210 
43211 
43212 /*************************************************************************
43213 Reallocates internal storage for set #SetIdx, increasing its  capacity  to
43214 NewAllocated exactly. This function may invalidate internal  pointers  for
43215 ALL   sets  in  the  kn-set  structure  because  it  may  perform  storage
43216 compression in order to reclaim previously freed space.
43217 
43218 INPUT PARAMETERS
43219     SA          -   kn-set structure
43220     SetIdx      -   set to reallocate
43221     NewAllocated -  new size for the set, must be at least equal to already
43222                     allocated
43223 
43224   -- ALGLIB PROJECT --
43225      Copyright 05.10.2020 by Bochkanov Sergey.
43226 *************************************************************************/
amdordering_knsreallocate(amdknset * sa,ae_int_t setidx,ae_int_t newallocated,ae_state * _state)43227 static void amdordering_knsreallocate(amdknset* sa,
43228      ae_int_t setidx,
43229      ae_int_t newallocated,
43230      ae_state *_state)
43231 {
43232     ae_int_t oldbegin;
43233     ae_int_t oldcnt;
43234     ae_int_t newbegin;
43235     ae_int_t j;
43236 
43237 
43238     if( sa->data.cnt<sa->dataused+amdordering_knsheadersize+newallocated )
43239     {
43240         amdordering_knscompressstorage(sa, _state);
43241         if( sa->data.cnt<sa->dataused+amdordering_knsheadersize+newallocated )
43242         {
43243             ivectorgrowto(&sa->data, sa->dataused+amdordering_knsheadersize+newallocated, _state);
43244         }
43245     }
43246     oldbegin = sa->vbegin.ptr.p_int[setidx];
43247     oldcnt = sa->vcnt.ptr.p_int[setidx];
43248     newbegin = sa->dataused+amdordering_knsheadersize;
43249     sa->vbegin.ptr.p_int[setidx] = newbegin;
43250     sa->vallocated.ptr.p_int[setidx] = newallocated;
43251     sa->data.ptr.p_int[oldbegin-1] = -1;
43252     sa->data.ptr.p_int[newbegin-2] = amdordering_knsheadersize+newallocated;
43253     sa->data.ptr.p_int[newbegin-1] = setidx;
43254     sa->dataused = sa->dataused+sa->data.ptr.p_int[newbegin-2];
43255     for(j=0; j<=oldcnt-1; j++)
43256     {
43257         sa->data.ptr.p_int[newbegin+j] = sa->data.ptr.p_int[oldbegin+j];
43258     }
43259 }
43260 
43261 
43262 /*************************************************************************
43263 Initialize kn-set
43264 
43265 INPUT PARAMETERS
43266     K           -   sets count
43267     N           -   set size
43268     kPrealloc   -   preallocate place per set (can be zero)
43269 
43270 OUTPUT PARAMETERS
43271     SA          -   K sets of N elements, initially empty
43272 
43273   -- ALGLIB PROJECT --
43274      Copyright 05.10.2020 by Bochkanov Sergey.
43275 *************************************************************************/
amdordering_knsinit(ae_int_t k,ae_int_t n,ae_int_t kprealloc,amdknset * sa,ae_state * _state)43276 static void amdordering_knsinit(ae_int_t k,
43277      ae_int_t n,
43278      ae_int_t kprealloc,
43279      amdknset* sa,
43280      ae_state *_state)
43281 {
43282     ae_int_t i;
43283 
43284 
43285     sa->k = n;
43286     sa->n = n;
43287     isetallocv(n, -1, &sa->flagarray, _state);
43288     isetallocv(n, kprealloc, &sa->vallocated, _state);
43289     ivectorsetlengthatleast(&sa->vbegin, n, _state);
43290     sa->vbegin.ptr.p_int[0] = amdordering_knsheadersize;
43291     for(i=1; i<=n-1; i++)
43292     {
43293         sa->vbegin.ptr.p_int[i] = sa->vbegin.ptr.p_int[i-1]+sa->vallocated.ptr.p_int[i-1]+amdordering_knsheadersize;
43294     }
43295     sa->dataused = sa->vbegin.ptr.p_int[n-1]+sa->vallocated.ptr.p_int[n-1];
43296     ivectorsetlengthatleast(&sa->data, sa->dataused, _state);
43297     for(i=0; i<=n-1; i++)
43298     {
43299         sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]-2] = amdordering_knsheadersize+sa->vallocated.ptr.p_int[i];
43300         sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]-1] = i;
43301     }
43302     isetallocv(n, 0, &sa->vcnt, _state);
43303 }
43304 
43305 
43306 /*************************************************************************
43307 Initialize kn-set from lower triangle of symmetric A
43308 
43309 INPUT PARAMETERS
43310     A           -   lower triangular sparse matrix in CRS format
43311     N           -   problem size
43312 
43313 OUTPUT PARAMETERS
43314     SA          -   N sets of N elements, reproducing both lower and upper
43315                     triangles of A
43316 
43317   -- ALGLIB PROJECT --
43318      Copyright 05.10.2020 by Bochkanov Sergey.
43319 *************************************************************************/
amdordering_knsinitfroma(sparsematrix * a,ae_int_t n,amdknset * sa,ae_state * _state)43320 static void amdordering_knsinitfroma(sparsematrix* a,
43321      ae_int_t n,
43322      amdknset* sa,
43323      ae_state *_state)
43324 {
43325     ae_int_t i;
43326     ae_int_t j;
43327     ae_int_t jj;
43328     ae_int_t j0;
43329     ae_int_t j1;
43330 
43331 
43332     sa->k = n;
43333     sa->n = n;
43334     isetallocv(n, -1, &sa->flagarray, _state);
43335     ivectorsetlengthatleast(&sa->vallocated, n, _state);
43336     for(i=0; i<=n-1; i++)
43337     {
43338         ae_assert(a->didx.ptr.p_int[i]<a->uidx.ptr.p_int[i], "knsInitFromA: integrity check for diagonal of A failed", _state);
43339         j0 = a->ridx.ptr.p_int[i];
43340         j1 = a->didx.ptr.p_int[i]-1;
43341         sa->vallocated.ptr.p_int[i] = 1+(j1-j0+1);
43342         for(jj=j0; jj<=j1; jj++)
43343         {
43344             j = a->idx.ptr.p_int[jj];
43345             sa->vallocated.ptr.p_int[j] = sa->vallocated.ptr.p_int[j]+1;
43346         }
43347     }
43348     ivectorsetlengthatleast(&sa->vbegin, n, _state);
43349     sa->vbegin.ptr.p_int[0] = amdordering_knsheadersize;
43350     for(i=1; i<=n-1; i++)
43351     {
43352         sa->vbegin.ptr.p_int[i] = sa->vbegin.ptr.p_int[i-1]+sa->vallocated.ptr.p_int[i-1]+amdordering_knsheadersize;
43353     }
43354     sa->dataused = sa->vbegin.ptr.p_int[n-1]+sa->vallocated.ptr.p_int[n-1];
43355     ivectorsetlengthatleast(&sa->data, sa->dataused, _state);
43356     for(i=0; i<=n-1; i++)
43357     {
43358         sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]-2] = amdordering_knsheadersize+sa->vallocated.ptr.p_int[i];
43359         sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]-1] = i;
43360     }
43361     isetallocv(n, 0, &sa->vcnt, _state);
43362     for(i=0; i<=n-1; i++)
43363     {
43364         sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]+sa->vcnt.ptr.p_int[i]] = i;
43365         sa->vcnt.ptr.p_int[i] = sa->vcnt.ptr.p_int[i]+1;
43366         j0 = a->ridx.ptr.p_int[i];
43367         j1 = a->didx.ptr.p_int[i]-1;
43368         for(jj=j0; jj<=j1; jj++)
43369         {
43370             j = a->idx.ptr.p_int[jj];
43371             sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]+sa->vcnt.ptr.p_int[i]] = j;
43372             sa->data.ptr.p_int[sa->vbegin.ptr.p_int[j]+sa->vcnt.ptr.p_int[j]] = i;
43373             sa->vcnt.ptr.p_int[i] = sa->vcnt.ptr.p_int[i]+1;
43374             sa->vcnt.ptr.p_int[j] = sa->vcnt.ptr.p_int[j]+1;
43375         }
43376     }
43377 }
43378 
43379 
43380 /*************************************************************************
43381 Prepares iteration over I-th set
43382 
43383 INPUT PARAMETERS
43384     SA          -   kn-set
43385     I           -   set index
43386 
43387 OUTPUT PARAMETERS
43388     SA          -   SA ready for repeated calls of knsEnumerate()
43389 
43390   -- ALGLIB PROJECT --
43391      Copyright 05.10.2020 by Bochkanov Sergey.
43392 *************************************************************************/
amdordering_knsstartenumeration(amdknset * sa,ae_int_t i,ae_state * _state)43393 static void amdordering_knsstartenumeration(amdknset* sa,
43394      ae_int_t i,
43395      ae_state *_state)
43396 {
43397 
43398 
43399     sa->iterrow = i;
43400     sa->iteridx = 0;
43401 }
43402 
43403 
43404 /*************************************************************************
43405 Iterates over I-th set (as specified during recent knsStartEnumeration call).
43406 Subsequent calls return True and set J to new set item until iteration
43407 stops and False is returned.
43408 
43409 INPUT PARAMETERS
43410     SA          -   kn-set
43411 
43412 OUTPUT PARAMETERS
43413     J           -   if:
43414                     * Result=True - index of element in the set
43415                     * Result=False - not set
43416 
43417 
43418   -- ALGLIB PROJECT --
43419      Copyright 05.10.2020 by Bochkanov Sergey.
43420 *************************************************************************/
amdordering_knsenumerate(amdknset * sa,ae_int_t * i,ae_state * _state)43421 static ae_bool amdordering_knsenumerate(amdknset* sa,
43422      ae_int_t* i,
43423      ae_state *_state)
43424 {
43425     ae_bool result;
43426 
43427     *i = 0;
43428 
43429     if( sa->iteridx<sa->vcnt.ptr.p_int[sa->iterrow] )
43430     {
43431         *i = sa->data.ptr.p_int[sa->vbegin.ptr.p_int[sa->iterrow]+sa->iteridx];
43432         sa->iteridx = sa->iteridx+1;
43433         result = ae_true;
43434     }
43435     else
43436     {
43437         result = ae_false;
43438     }
43439     return result;
43440 }
43441 
43442 
43443 /*************************************************************************
43444 Allows direct access to internal storage  of  kn-set  structure  - returns
43445 range of elements SA.Data[idxBegin...idxEnd-1] used to store K-th set
43446 
43447 INPUT PARAMETERS
43448     SA          -   kn-set
43449     K           -   set index
43450 
43451 OUTPUT PARAMETERS
43452     idxBegin,
43453     idxEnd      -   half-range [idxBegin,idxEnd) of SA.Data that stores
43454                     K-th set
43455 
43456 
43457   -- ALGLIB PROJECT --
43458      Copyright 05.10.2020 by Bochkanov Sergey.
43459 *************************************************************************/
amdordering_knsdirectaccess(amdknset * sa,ae_int_t k,ae_int_t * idxbegin,ae_int_t * idxend,ae_state * _state)43460 static void amdordering_knsdirectaccess(amdknset* sa,
43461      ae_int_t k,
43462      ae_int_t* idxbegin,
43463      ae_int_t* idxend,
43464      ae_state *_state)
43465 {
43466 
43467     *idxbegin = 0;
43468     *idxend = 0;
43469 
43470     *idxbegin = sa->vbegin.ptr.p_int[k];
43471     *idxend = *idxbegin+sa->vcnt.ptr.p_int[k];
43472 }
43473 
43474 
43475 /*************************************************************************
43476 Add K-th element to I-th set. The caller guarantees that  the  element  is
43477 not present in the target set.
43478 
43479 INPUT PARAMETERS
43480     SA          -   kn-set
43481     I           -   set index
43482     K           -   element to add
43483 
43484 OUTPUT PARAMETERS
43485     SA          -   modified SA
43486 
43487   -- ALGLIB PROJECT --
43488      Copyright 05.10.2020 by Bochkanov Sergey.
43489 *************************************************************************/
amdordering_knsaddnewelement(amdknset * sa,ae_int_t i,ae_int_t k,ae_state * _state)43490 static void amdordering_knsaddnewelement(amdknset* sa,
43491      ae_int_t i,
43492      ae_int_t k,
43493      ae_state *_state)
43494 {
43495     ae_int_t cnt;
43496 
43497 
43498     cnt = sa->vcnt.ptr.p_int[i];
43499     if( cnt==sa->vallocated.ptr.p_int[i] )
43500     {
43501         amdordering_knsreallocate(sa, i, 2*sa->vallocated.ptr.p_int[i]+1, _state);
43502     }
43503     sa->data.ptr.p_int[sa->vbegin.ptr.p_int[i]+cnt] = k;
43504     sa->vcnt.ptr.p_int[i] = cnt+1;
43505 }
43506 
43507 
43508 /*************************************************************************
43509 Subtracts source n-set from the I-th set of the destination kn-set.
43510 
43511 INPUT PARAMETERS
43512     SA          -   destination kn-set structure
43513     I           -   set index in the structure
43514     Src         -   source n-set
43515 
43516 OUTPUT PARAMETERS
43517     SA          -   I-th set except for elements in Src
43518 
43519   -- ALGLIB PROJECT --
43520      Copyright 05.10.2020 by Bochkanov Sergey.
43521 *************************************************************************/
amdordering_knssubtract1(amdknset * sa,ae_int_t i,amdnset * src,ae_state * _state)43522 static void amdordering_knssubtract1(amdknset* sa,
43523      ae_int_t i,
43524      amdnset* src,
43525      ae_state *_state)
43526 {
43527     ae_int_t j;
43528     ae_int_t idxbegin;
43529     ae_int_t idxend;
43530     ae_int_t cnt;
43531 
43532 
43533     cnt = sa->vcnt.ptr.p_int[i];
43534     idxbegin = sa->vbegin.ptr.p_int[i];
43535     idxend = idxbegin+cnt;
43536     while(idxbegin<idxend)
43537     {
43538         j = sa->data.ptr.p_int[idxbegin];
43539         if( src->locationof.ptr.p_int[j]>=0 )
43540         {
43541             sa->data.ptr.p_int[idxbegin] = sa->data.ptr.p_int[idxend-1];
43542             idxend = idxend-1;
43543             cnt = cnt-1;
43544         }
43545         else
43546         {
43547             idxbegin = idxbegin+1;
43548         }
43549     }
43550     sa->vcnt.ptr.p_int[i] = cnt;
43551 }
43552 
43553 
43554 /*************************************************************************
43555 Adds Kth set of the source kn-set to the I-th destination set. The  caller
43556 guarantees that SA[I] and Src[J] do NOT intersect, i.e. do not have shared
43557 elements - it allows to use faster algorithms.
43558 
43559 INPUT PARAMETERS
43560     SA          -   destination kn-set structure
43561     I           -   set index in the structure
43562     Src         -   source kn-set
43563     K           -   set index
43564 
43565 OUTPUT PARAMETERS
43566     SA          -   I-th set plus for elements in K-th set of Src
43567 
43568   -- ALGLIB PROJECT --
43569      Copyright 05.10.2020 by Bochkanov Sergey.
43570 *************************************************************************/
amdordering_knsaddkthdistinct(amdknset * sa,ae_int_t i,amdknset * src,ae_int_t k,ae_state * _state)43571 static void amdordering_knsaddkthdistinct(amdknset* sa,
43572      ae_int_t i,
43573      amdknset* src,
43574      ae_int_t k,
43575      ae_state *_state)
43576 {
43577     ae_int_t idxdst;
43578     ae_int_t idxsrcbegin;
43579     ae_int_t cnt;
43580     ae_int_t srccnt;
43581     ae_int_t j;
43582 
43583 
43584     cnt = sa->vcnt.ptr.p_int[i];
43585     srccnt = src->vcnt.ptr.p_int[k];
43586     if( cnt+srccnt>sa->vallocated.ptr.p_int[i] )
43587     {
43588         amdordering_knsreallocate(sa, i, 2*(cnt+srccnt)+1, _state);
43589     }
43590     idxsrcbegin = src->vbegin.ptr.p_int[k];
43591     idxdst = sa->vbegin.ptr.p_int[i]+cnt;
43592     for(j=0; j<=srccnt-1; j++)
43593     {
43594         sa->data.ptr.p_int[idxdst] = src->data.ptr.p_int[idxsrcbegin+j];
43595         idxdst = idxdst+1;
43596     }
43597     sa->vcnt.ptr.p_int[i] = cnt+srccnt;
43598 }
43599 
43600 
43601 /*************************************************************************
43602 Counts elements of K-th set of S0
43603 
43604 INPUT PARAMETERS
43605     S0          -   kn-set structure
43606     K           -   set index in the structure S0
43607 
43608 RESULT
43609     K-th set element count
43610 
43611   -- ALGLIB PROJECT --
43612      Copyright 05.10.2020 by Bochkanov Sergey.
43613 *************************************************************************/
amdordering_knscountkth(amdknset * s0,ae_int_t k,ae_state * _state)43614 static ae_int_t amdordering_knscountkth(amdknset* s0,
43615      ae_int_t k,
43616      ae_state *_state)
43617 {
43618     ae_int_t result;
43619 
43620 
43621     result = s0->vcnt.ptr.p_int[k];
43622     return result;
43623 }
43624 
43625 
43626 /*************************************************************************
43627 Counts elements of I-th set of S0 not present in S1
43628 
43629 INPUT PARAMETERS
43630     S0          -   kn-set structure
43631     I           -   set index in the structure S0
43632     S1          -   kn-set to compare against
43633 
43634 RESULT
43635     count
43636 
43637   -- ALGLIB PROJECT --
43638      Copyright 05.10.2020 by Bochkanov Sergey.
43639 *************************************************************************/
amdordering_knscountnot(amdknset * s0,ae_int_t i,amdnset * s1,ae_state * _state)43640 static ae_int_t amdordering_knscountnot(amdknset* s0,
43641      ae_int_t i,
43642      amdnset* s1,
43643      ae_state *_state)
43644 {
43645     ae_int_t idxbegin0;
43646     ae_int_t cnt0;
43647     ae_int_t j;
43648     ae_int_t result;
43649 
43650 
43651     cnt0 = s0->vcnt.ptr.p_int[i];
43652     idxbegin0 = s0->vbegin.ptr.p_int[i];
43653     result = 0;
43654     for(j=0; j<=cnt0-1; j++)
43655     {
43656         if( s1->locationof.ptr.p_int[s0->data.ptr.p_int[idxbegin0+j]]<0 )
43657         {
43658             result = result+1;
43659         }
43660     }
43661     return result;
43662 }
43663 
43664 
43665 /*************************************************************************
43666 Counts elements of I-th set of S0 not present in K-th set of S1
43667 
43668 INPUT PARAMETERS
43669     S0          -   kn-set structure
43670     I           -   set index in the structure S0
43671     S1          -   kn-set to compare against
43672     K           -   set index in the structure S1
43673 
43674 RESULT
43675     count
43676 
43677   -- ALGLIB PROJECT --
43678      Copyright 05.10.2020 by Bochkanov Sergey.
43679 *************************************************************************/
amdordering_knscountnotkth(amdknset * s0,ae_int_t i,amdknset * s1,ae_int_t k,ae_state * _state)43680 static ae_int_t amdordering_knscountnotkth(amdknset* s0,
43681      ae_int_t i,
43682      amdknset* s1,
43683      ae_int_t k,
43684      ae_state *_state)
43685 {
43686     ae_int_t idxbegin0;
43687     ae_int_t idxbegin1;
43688     ae_int_t cnt0;
43689     ae_int_t cnt1;
43690     ae_int_t j;
43691     ae_int_t result;
43692 
43693 
43694     cnt0 = s0->vcnt.ptr.p_int[i];
43695     cnt1 = s1->vcnt.ptr.p_int[k];
43696     idxbegin0 = s0->vbegin.ptr.p_int[i];
43697     idxbegin1 = s1->vbegin.ptr.p_int[k];
43698     for(j=0; j<=cnt1-1; j++)
43699     {
43700         s0->flagarray.ptr.p_int[s1->data.ptr.p_int[idxbegin1+j]] = 1;
43701     }
43702     result = 0;
43703     for(j=0; j<=cnt0-1; j++)
43704     {
43705         if( s0->flagarray.ptr.p_int[s0->data.ptr.p_int[idxbegin0+j]]<0 )
43706         {
43707             result = result+1;
43708         }
43709     }
43710     for(j=0; j<=cnt1-1; j++)
43711     {
43712         s0->flagarray.ptr.p_int[s1->data.ptr.p_int[idxbegin1+j]] = -1;
43713     }
43714     return result;
43715 }
43716 
43717 
43718 /*************************************************************************
43719 Counts elements of I-th set of S0 that are also present in K-th set of S1
43720 
43721 INPUT PARAMETERS
43722     S0          -   kn-set structure
43723     I           -   set index in the structure S0
43724     S1          -   kn-set to compare against
43725     K           -   set index in the structure S1
43726 
43727 RESULT
43728     count
43729 
43730   -- ALGLIB PROJECT --
43731      Copyright 05.10.2020 by Bochkanov Sergey.
43732 *************************************************************************/
amdordering_knscountandkth(amdknset * s0,ae_int_t i,amdknset * s1,ae_int_t k,ae_state * _state)43733 static ae_int_t amdordering_knscountandkth(amdknset* s0,
43734      ae_int_t i,
43735      amdknset* s1,
43736      ae_int_t k,
43737      ae_state *_state)
43738 {
43739     ae_int_t idxbegin0;
43740     ae_int_t idxbegin1;
43741     ae_int_t cnt0;
43742     ae_int_t cnt1;
43743     ae_int_t j;
43744     ae_int_t result;
43745 
43746 
43747     cnt0 = s0->vcnt.ptr.p_int[i];
43748     cnt1 = s1->vcnt.ptr.p_int[k];
43749     idxbegin0 = s0->vbegin.ptr.p_int[i];
43750     idxbegin1 = s1->vbegin.ptr.p_int[k];
43751     for(j=0; j<=cnt1-1; j++)
43752     {
43753         s0->flagarray.ptr.p_int[s1->data.ptr.p_int[idxbegin1+j]] = 1;
43754     }
43755     result = 0;
43756     for(j=0; j<=cnt0-1; j++)
43757     {
43758         if( s0->flagarray.ptr.p_int[s0->data.ptr.p_int[idxbegin0+j]]>0 )
43759         {
43760             result = result+1;
43761         }
43762     }
43763     for(j=0; j<=cnt1-1; j++)
43764     {
43765         s0->flagarray.ptr.p_int[s1->data.ptr.p_int[idxbegin1+j]] = -1;
43766     }
43767     return result;
43768 }
43769 
43770 
43771 /*************************************************************************
43772 Sums elements in I-th set of S0, returns sum.
43773 
43774 INPUT PARAMETERS
43775     S0          -   kn-set structure
43776     I           -   set index in the structure S0
43777 
43778 RESULT
43779     sum
43780 
43781   -- ALGLIB PROJECT --
43782      Copyright 05.10.2020 by Bochkanov Sergey.
43783 *************************************************************************/
amdordering_knssumkth(amdknset * s0,ae_int_t i,ae_state * _state)43784 static ae_int_t amdordering_knssumkth(amdknset* s0,
43785      ae_int_t i,
43786      ae_state *_state)
43787 {
43788     ae_int_t idxbegin0;
43789     ae_int_t cnt0;
43790     ae_int_t j;
43791     ae_int_t result;
43792 
43793 
43794     cnt0 = s0->vcnt.ptr.p_int[i];
43795     idxbegin0 = s0->vbegin.ptr.p_int[i];
43796     result = 0;
43797     for(j=0; j<=cnt0-1; j++)
43798     {
43799         result = result+s0->data.ptr.p_int[idxbegin0+j];
43800     }
43801     return result;
43802 }
43803 
43804 
43805 /*************************************************************************
43806 Clear k-th kn-set in collection.
43807 
43808 Freed memory is NOT reclaimed for future garbage collection.
43809 
43810 INPUT PARAMETERS
43811     SA          -   kn-set structure
43812     K           -   set index
43813 
43814 OUTPUT PARAMETERS
43815     SA          -   K-th set was cleared
43816 
43817   -- ALGLIB PROJECT --
43818      Copyright 05.10.2020 by Bochkanov Sergey.
43819 *************************************************************************/
amdordering_knsclearkthnoreclaim(amdknset * sa,ae_int_t k,ae_state * _state)43820 static void amdordering_knsclearkthnoreclaim(amdknset* sa,
43821      ae_int_t k,
43822      ae_state *_state)
43823 {
43824 
43825 
43826     sa->vcnt.ptr.p_int[k] = 0;
43827 }
43828 
43829 
43830 /*************************************************************************
43831 Clear k-th kn-set in collection.
43832 
43833 Freed memory is reclaimed for future garbage collection. This function  is
43834 NOT recommended if you intend to add elements to this set in some  future,
43835 because every addition will result in  reallocation  of  previously  freed
43836 memory. Use knsClearKthNoReclaim().
43837 
43838 INPUT PARAMETERS
43839     SA          -   kn-set structure
43840     K           -   set index
43841 
43842 OUTPUT PARAMETERS
43843     SA          -   K-th set was cleared
43844 
43845   -- ALGLIB PROJECT --
43846      Copyright 05.10.2020 by Bochkanov Sergey.
43847 *************************************************************************/
amdordering_knsclearkthreclaim(amdknset * sa,ae_int_t k,ae_state * _state)43848 static void amdordering_knsclearkthreclaim(amdknset* sa,
43849      ae_int_t k,
43850      ae_state *_state)
43851 {
43852     ae_int_t idxbegin;
43853     ae_int_t allocated;
43854 
43855 
43856     idxbegin = sa->vbegin.ptr.p_int[k];
43857     allocated = sa->vallocated.ptr.p_int[k];
43858     sa->vcnt.ptr.p_int[k] = 0;
43859     if( allocated>=amdordering_knsheadersize )
43860     {
43861         sa->data.ptr.p_int[idxbegin-2] = 2;
43862         sa->data.ptr.p_int[idxbegin+0] = allocated;
43863         sa->data.ptr.p_int[idxbegin+1] = -1;
43864         sa->vallocated.ptr.p_int[k] = 0;
43865     }
43866 }
43867 
43868 
43869 /*************************************************************************
43870 Initialize linked list matrix
43871 
43872 INPUT PARAMETERS
43873     N           -   matrix size
43874 
43875 OUTPUT PARAMETERS
43876     A           -   NxN linked list matrix
43877 
43878   -- ALGLIB PROJECT --
43879      Copyright 05.10.2020 by Bochkanov Sergey.
43880 *************************************************************************/
amdordering_mtxinit(ae_int_t n,amdllmatrix * a,ae_state * _state)43881 static void amdordering_mtxinit(ae_int_t n,
43882      amdllmatrix* a,
43883      ae_state *_state)
43884 {
43885 
43886 
43887     a->n = n;
43888     isetallocv(2*n+1, -1, &a->vbegin, _state);
43889     isetallocv(n, 0, &a->vcolcnt, _state);
43890     a->entriesinitialized = 0;
43891 }
43892 
43893 
43894 /*************************************************************************
43895 Adds column from matrix to n-set
43896 
43897 INPUT PARAMETERS
43898     A           -   NxN linked list matrix
43899     J           -   column index to add
43900     S           -   target n-set
43901 
43902 OUTPUT PARAMETERS
43903     S           -   elements from J-th column are added to S
43904 
43905 
43906   -- ALGLIB PROJECT --
43907      Copyright 05.10.2020 by Bochkanov Sergey.
43908 *************************************************************************/
amdordering_mtxaddcolumnto(amdllmatrix * a,ae_int_t j,amdnset * s,ae_state * _state)43909 static void amdordering_mtxaddcolumnto(amdllmatrix* a,
43910      ae_int_t j,
43911      amdnset* s,
43912      ae_state *_state)
43913 {
43914     ae_int_t n;
43915     ae_int_t eidx;
43916 
43917 
43918     n = a->n;
43919     eidx = a->vbegin.ptr.p_int[n+j];
43920     while(eidx>=0)
43921     {
43922         amdordering_nsaddelement(s, a->entries.ptr.p_int[eidx*amdordering_llmentrysize+4], _state);
43923         eidx = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+3];
43924     }
43925 }
43926 
43927 
43928 /*************************************************************************
43929 Inserts new element into column J, row I. The caller guarantees  that  the
43930 element being inserted is NOT already present in the matrix.
43931 
43932 INPUT PARAMETERS
43933     A           -   NxN linked list matrix
43934     I           -   row index
43935     J           -   column index
43936 
43937 OUTPUT PARAMETERS
43938     A           -   element (I,J) added to the list.
43939 
43940 
43941   -- ALGLIB PROJECT --
43942      Copyright 05.10.2020 by Bochkanov Sergey.
43943 *************************************************************************/
amdordering_mtxinsertnewelement(amdllmatrix * a,ae_int_t i,ae_int_t j,ae_state * _state)43944 static void amdordering_mtxinsertnewelement(amdllmatrix* a,
43945      ae_int_t i,
43946      ae_int_t j,
43947      ae_state *_state)
43948 {
43949     ae_int_t n;
43950     ae_int_t k;
43951     ae_int_t newsize;
43952     ae_int_t eidx;
43953     ae_int_t offs;
43954 
43955 
43956     n = a->n;
43957     if( a->vbegin.ptr.p_int[2*n]<0 )
43958     {
43959         newsize = 2*a->entriesinitialized+1;
43960         ivectorresize(&a->entries, newsize*amdordering_llmentrysize, _state);
43961         for(k=a->entriesinitialized; k<=newsize-2; k++)
43962         {
43963             a->entries.ptr.p_int[k*amdordering_llmentrysize+0] = k+1;
43964         }
43965         a->entries.ptr.p_int[(newsize-1)*amdordering_llmentrysize+0] = a->vbegin.ptr.p_int[2*n];
43966         a->vbegin.ptr.p_int[2*n] = a->entriesinitialized;
43967         a->entriesinitialized = newsize;
43968     }
43969     eidx = a->vbegin.ptr.p_int[2*n];
43970     offs = eidx*amdordering_llmentrysize;
43971     a->vbegin.ptr.p_int[2*n] = a->entries.ptr.p_int[offs+0];
43972     a->entries.ptr.p_int[offs+0] = -1;
43973     a->entries.ptr.p_int[offs+1] = a->vbegin.ptr.p_int[i];
43974     if( a->vbegin.ptr.p_int[i]>=0 )
43975     {
43976         a->entries.ptr.p_int[a->vbegin.ptr.p_int[i]*amdordering_llmentrysize+0] = eidx;
43977     }
43978     a->entries.ptr.p_int[offs+2] = -1;
43979     a->entries.ptr.p_int[offs+3] = a->vbegin.ptr.p_int[j+n];
43980     if( a->vbegin.ptr.p_int[j+n]>=0 )
43981     {
43982         a->entries.ptr.p_int[a->vbegin.ptr.p_int[j+n]*amdordering_llmentrysize+2] = eidx;
43983     }
43984     a->entries.ptr.p_int[offs+4] = i;
43985     a->entries.ptr.p_int[offs+5] = j;
43986     a->vbegin.ptr.p_int[i] = eidx;
43987     a->vbegin.ptr.p_int[j+n] = eidx;
43988     a->vcolcnt.ptr.p_int[j] = a->vcolcnt.ptr.p_int[j]+1;
43989 }
43990 
43991 
43992 /*************************************************************************
43993 Counts elements in J-th column that are not present in n-set S
43994 
43995 INPUT PARAMETERS
43996     A           -   NxN linked list matrix
43997     J           -   column index
43998     S           -   n-set to compare against
43999 
44000 RESULT
44001     element count
44002 
44003 
44004   -- ALGLIB PROJECT --
44005      Copyright 05.10.2020 by Bochkanov Sergey.
44006 *************************************************************************/
amdordering_mtxcountcolumnnot(amdllmatrix * a,ae_int_t j,amdnset * s,ae_state * _state)44007 static ae_int_t amdordering_mtxcountcolumnnot(amdllmatrix* a,
44008      ae_int_t j,
44009      amdnset* s,
44010      ae_state *_state)
44011 {
44012     ae_int_t n;
44013     ae_int_t eidx;
44014     ae_int_t result;
44015 
44016 
44017     n = a->n;
44018     result = 0;
44019     eidx = a->vbegin.ptr.p_int[n+j];
44020     while(eidx>=0)
44021     {
44022         if( s->locationof.ptr.p_int[a->entries.ptr.p_int[eidx*amdordering_llmentrysize+4]]<0 )
44023         {
44024             result = result+1;
44025         }
44026         eidx = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+3];
44027     }
44028     return result;
44029 }
44030 
44031 
44032 /*************************************************************************
44033 Counts elements in J-th column
44034 
44035 INPUT PARAMETERS
44036     A           -   NxN linked list matrix
44037     J           -   column index
44038 
44039 RESULT
44040     element count
44041 
44042 
44043   -- ALGLIB PROJECT --
44044      Copyright 05.10.2020 by Bochkanov Sergey.
44045 *************************************************************************/
amdordering_mtxcountcolumn(amdllmatrix * a,ae_int_t j,ae_state * _state)44046 static ae_int_t amdordering_mtxcountcolumn(amdllmatrix* a,
44047      ae_int_t j,
44048      ae_state *_state)
44049 {
44050     ae_int_t result;
44051 
44052 
44053     result = a->vcolcnt.ptr.p_int[j];
44054     return result;
44055 }
44056 
44057 
44058 /*************************************************************************
44059 Clears K-th column or row
44060 
44061 INPUT PARAMETERS
44062     A           -   NxN linked list matrix
44063     K           -   column/row index to clear
44064     IsCol       -   whether we want to clear row or column
44065 
44066 OUTPUT PARAMETERS
44067     A           -   K-th column or row is empty
44068 
44069 
44070   -- ALGLIB PROJECT --
44071      Copyright 05.10.2020 by Bochkanov Sergey.
44072 *************************************************************************/
amdordering_mtxclearx(amdllmatrix * a,ae_int_t k,ae_bool iscol,ae_state * _state)44073 static void amdordering_mtxclearx(amdllmatrix* a,
44074      ae_int_t k,
44075      ae_bool iscol,
44076      ae_state *_state)
44077 {
44078     ae_int_t n;
44079     ae_int_t eidx;
44080     ae_int_t enext;
44081     ae_int_t idxprev;
44082     ae_int_t idxnext;
44083     ae_int_t idxr;
44084     ae_int_t idxc;
44085 
44086 
44087     n = a->n;
44088     if( iscol )
44089     {
44090         eidx = a->vbegin.ptr.p_int[n+k];
44091     }
44092     else
44093     {
44094         eidx = a->vbegin.ptr.p_int[k];
44095     }
44096     while(eidx>=0)
44097     {
44098         idxr = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+4];
44099         idxc = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+5];
44100         if( iscol )
44101         {
44102             enext = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+3];
44103         }
44104         else
44105         {
44106             enext = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+1];
44107         }
44108         idxprev = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+0];
44109         idxnext = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+1];
44110         if( idxprev>=0 )
44111         {
44112             a->entries.ptr.p_int[idxprev*amdordering_llmentrysize+1] = idxnext;
44113         }
44114         else
44115         {
44116             a->vbegin.ptr.p_int[idxr] = idxnext;
44117         }
44118         if( idxnext>=0 )
44119         {
44120             a->entries.ptr.p_int[idxnext*amdordering_llmentrysize+0] = idxprev;
44121         }
44122         idxprev = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+2];
44123         idxnext = a->entries.ptr.p_int[eidx*amdordering_llmentrysize+3];
44124         if( idxprev>=0 )
44125         {
44126             a->entries.ptr.p_int[idxprev*amdordering_llmentrysize+3] = idxnext;
44127         }
44128         else
44129         {
44130             a->vbegin.ptr.p_int[idxc+n] = idxnext;
44131         }
44132         if( idxnext>=0 )
44133         {
44134             a->entries.ptr.p_int[idxnext*amdordering_llmentrysize+2] = idxprev;
44135         }
44136         a->entries.ptr.p_int[eidx*amdordering_llmentrysize+0] = a->vbegin.ptr.p_int[2*n];
44137         a->vbegin.ptr.p_int[2*n] = eidx;
44138         eidx = enext;
44139         if( !iscol )
44140         {
44141             a->vcolcnt.ptr.p_int[idxc] = a->vcolcnt.ptr.p_int[idxc]-1;
44142         }
44143     }
44144     if( iscol )
44145     {
44146         a->vcolcnt.ptr.p_int[k] = 0;
44147     }
44148 }
44149 
44150 
44151 /*************************************************************************
44152 Clears J-th column
44153 
44154 INPUT PARAMETERS
44155     A           -   NxN linked list matrix
44156     J           -   column index to clear
44157 
44158 OUTPUT PARAMETERS
44159     A           -   J-th column is empty
44160 
44161 
44162   -- ALGLIB PROJECT --
44163      Copyright 05.10.2020 by Bochkanov Sergey.
44164 *************************************************************************/
amdordering_mtxclearcolumn(amdllmatrix * a,ae_int_t j,ae_state * _state)44165 static void amdordering_mtxclearcolumn(amdllmatrix* a,
44166      ae_int_t j,
44167      ae_state *_state)
44168 {
44169 
44170 
44171     amdordering_mtxclearx(a, j, ae_true, _state);
44172 }
44173 
44174 
44175 /*************************************************************************
44176 Clears J-th row
44177 
44178 INPUT PARAMETERS
44179     A           -   NxN linked list matrix
44180     J           -   row index to clear
44181 
44182 OUTPUT PARAMETERS
44183     A           -   J-th row is empty
44184 
44185 
44186   -- ALGLIB PROJECT --
44187      Copyright 05.10.2020 by Bochkanov Sergey.
44188 *************************************************************************/
amdordering_mtxclearrow(amdllmatrix * a,ae_int_t j,ae_state * _state)44189 static void amdordering_mtxclearrow(amdllmatrix* a,
44190      ae_int_t j,
44191      ae_state *_state)
44192 {
44193 
44194 
44195     amdordering_mtxclearx(a, j, ae_false, _state);
44196 }
44197 
44198 
44199 /*************************************************************************
44200 Initialize vertex storage using A to estimate initial degrees
44201 
44202 INPUT PARAMETERS
44203     A           -   NxN lower triangular sparse CRS matrix
44204     N           -   problem size
44205     CheckExactDegrees-
44206                     whether we want to maintain additional exact degress
44207                     (the search is still done using approximate ones)
44208 
44209 OUTPUT PARAMETERS
44210     S           -   vertex set
44211 
44212 
44213   -- ALGLIB PROJECT --
44214      Copyright 05.10.2020 by Bochkanov Sergey.
44215 *************************************************************************/
amdordering_vtxinit(sparsematrix * a,ae_int_t n,ae_bool checkexactdegrees,amdvertexset * s,ae_state * _state)44216 static void amdordering_vtxinit(sparsematrix* a,
44217      ae_int_t n,
44218      ae_bool checkexactdegrees,
44219      amdvertexset* s,
44220      ae_state *_state)
44221 {
44222     ae_int_t i;
44223     ae_int_t j;
44224     ae_int_t jj;
44225     ae_int_t j0;
44226     ae_int_t j1;
44227 
44228     _amdvertexset_clear(s);
44229 
44230     s->n = n;
44231     s->checkexactdegrees = checkexactdegrees;
44232     s->smallestdegree = 0;
44233     bsetallocv(n, ae_true, &s->isvertex, _state);
44234     isetallocv(n, 0, &s->approxd, _state);
44235     for(i=0; i<=n-1; i++)
44236     {
44237         j0 = a->ridx.ptr.p_int[i];
44238         j1 = a->didx.ptr.p_int[i]-1;
44239         s->approxd.ptr.p_int[i] = j1-j0+1;
44240         for(jj=j0; jj<=j1; jj++)
44241         {
44242             j = a->idx.ptr.p_int[jj];
44243             s->approxd.ptr.p_int[j] = s->approxd.ptr.p_int[j]+1;
44244         }
44245     }
44246     if( checkexactdegrees )
44247     {
44248         icopyallocv(n, &s->approxd, &s->optionalexactd, _state);
44249     }
44250     isetallocv(n, -1, &s->vbegin, _state);
44251     isetallocv(n, -1, &s->vprev, _state);
44252     isetallocv(n, -1, &s->vnext, _state);
44253     for(i=0; i<=n-1; i++)
44254     {
44255         j = s->approxd.ptr.p_int[i];
44256         j0 = s->vbegin.ptr.p_int[j];
44257         s->vbegin.ptr.p_int[j] = i;
44258         s->vnext.ptr.p_int[i] = j0;
44259         s->vprev.ptr.p_int[i] = -1;
44260         if( j0>=0 )
44261         {
44262             s->vprev.ptr.p_int[j0] = i;
44263         }
44264     }
44265 }
44266 
44267 
44268 /*************************************************************************
44269 Removes vertex from the storage
44270 
44271 INPUT PARAMETERS
44272     S           -   vertex set
44273     P           -   vertex to be removed
44274 
44275 OUTPUT PARAMETERS
44276     S           -   modified
44277 
44278 
44279   -- ALGLIB PROJECT --
44280      Copyright 05.10.2020 by Bochkanov Sergey.
44281 *************************************************************************/
amdordering_vtxremovevertex(amdvertexset * s,ae_int_t p,ae_state * _state)44282 static void amdordering_vtxremovevertex(amdvertexset* s,
44283      ae_int_t p,
44284      ae_state *_state)
44285 {
44286     ae_int_t d;
44287     ae_int_t idxprev;
44288     ae_int_t idxnext;
44289 
44290 
44291     d = s->approxd.ptr.p_int[p];
44292     idxprev = s->vprev.ptr.p_int[p];
44293     idxnext = s->vnext.ptr.p_int[p];
44294     if( idxprev>=0 )
44295     {
44296         s->vnext.ptr.p_int[idxprev] = idxnext;
44297     }
44298     else
44299     {
44300         s->vbegin.ptr.p_int[d] = idxnext;
44301     }
44302     if( idxnext>=0 )
44303     {
44304         s->vprev.ptr.p_int[idxnext] = idxprev;
44305     }
44306     s->isvertex.ptr.p_bool[p] = ae_false;
44307     s->approxd.ptr.p_int[p] = -9999999;
44308     if( s->checkexactdegrees )
44309     {
44310         s->optionalexactd.ptr.p_int[p] = -9999999;
44311     }
44312 }
44313 
44314 
44315 /*************************************************************************
44316 Get approximate degree. Result is undefined for removed vertexes.
44317 
44318 INPUT PARAMETERS
44319     S           -   vertex set
44320     P           -   vertex index
44321 
44322 RESULT
44323     vertex degree
44324 
44325 
44326   -- ALGLIB PROJECT --
44327      Copyright 05.10.2020 by Bochkanov Sergey.
44328 *************************************************************************/
amdordering_vtxgetapprox(amdvertexset * s,ae_int_t p,ae_state * _state)44329 static ae_int_t amdordering_vtxgetapprox(amdvertexset* s,
44330      ae_int_t p,
44331      ae_state *_state)
44332 {
44333     ae_int_t result;
44334 
44335 
44336     result = s->approxd.ptr.p_int[p];
44337     return result;
44338 }
44339 
44340 
44341 /*************************************************************************
44342 Get exact degree (or 0, if not supported).  Result is undefined for
44343 removed vertexes.
44344 
44345 INPUT PARAMETERS
44346     S           -   vertex set
44347     P           -   vertex index
44348 
44349 RESULT
44350     vertex degree
44351 
44352 
44353   -- ALGLIB PROJECT --
44354      Copyright 05.10.2020 by Bochkanov Sergey.
44355 *************************************************************************/
amdordering_vtxgetexact(amdvertexset * s,ae_int_t p,ae_state * _state)44356 static ae_int_t amdordering_vtxgetexact(amdvertexset* s,
44357      ae_int_t p,
44358      ae_state *_state)
44359 {
44360     ae_int_t result;
44361 
44362 
44363     if( s->checkexactdegrees )
44364     {
44365         result = s->optionalexactd.ptr.p_int[p];
44366     }
44367     else
44368     {
44369         result = 0;
44370     }
44371     return result;
44372 }
44373 
44374 
44375 /*************************************************************************
44376 Returns index of vertex with minimum approximate degree, or -1 when there
44377 is no vertex.
44378 
44379 INPUT PARAMETERS
44380     S           -   vertex set
44381 
44382 RESULT
44383     vertex index, or -1
44384 
44385 
44386   -- ALGLIB PROJECT --
44387      Copyright 05.10.2020 by Bochkanov Sergey.
44388 *************************************************************************/
amdordering_vtxgetapproxmindegree(amdvertexset * s,ae_state * _state)44389 static ae_int_t amdordering_vtxgetapproxmindegree(amdvertexset* s,
44390      ae_state *_state)
44391 {
44392     ae_int_t i;
44393     ae_int_t n;
44394     ae_int_t result;
44395 
44396 
44397     n = s->n;
44398     result = -1;
44399     for(i=s->smallestdegree; i<=n-1; i++)
44400     {
44401         if( s->vbegin.ptr.p_int[i]>=0 )
44402         {
44403             s->smallestdegree = i;
44404             result = s->vbegin.ptr.p_int[i];
44405             return result;
44406         }
44407     }
44408     return result;
44409 }
44410 
44411 
44412 /*************************************************************************
44413 Update approximate degree
44414 
44415 INPUT PARAMETERS
44416     S           -   vertex set
44417     P           -   vertex to be updated
44418     DNew        -   new degree
44419 
44420 OUTPUT PARAMETERS
44421     S           -   modified
44422 
44423 
44424   -- ALGLIB PROJECT --
44425      Copyright 05.10.2020 by Bochkanov Sergey.
44426 *************************************************************************/
amdordering_vtxupdateapproximatedegree(amdvertexset * s,ae_int_t p,ae_int_t dnew,ae_state * _state)44427 static void amdordering_vtxupdateapproximatedegree(amdvertexset* s,
44428      ae_int_t p,
44429      ae_int_t dnew,
44430      ae_state *_state)
44431 {
44432     ae_int_t dold;
44433     ae_int_t idxprev;
44434     ae_int_t idxnext;
44435     ae_int_t oldbegin;
44436 
44437 
44438     dold = s->approxd.ptr.p_int[p];
44439     if( dold==dnew )
44440     {
44441         return;
44442     }
44443     idxprev = s->vprev.ptr.p_int[p];
44444     idxnext = s->vnext.ptr.p_int[p];
44445     if( idxprev>=0 )
44446     {
44447         s->vnext.ptr.p_int[idxprev] = idxnext;
44448     }
44449     else
44450     {
44451         s->vbegin.ptr.p_int[dold] = idxnext;
44452     }
44453     if( idxnext>=0 )
44454     {
44455         s->vprev.ptr.p_int[idxnext] = idxprev;
44456     }
44457     oldbegin = s->vbegin.ptr.p_int[dnew];
44458     s->vbegin.ptr.p_int[dnew] = p;
44459     s->vnext.ptr.p_int[p] = oldbegin;
44460     s->vprev.ptr.p_int[p] = -1;
44461     if( oldbegin>=0 )
44462     {
44463         s->vprev.ptr.p_int[oldbegin] = p;
44464     }
44465     s->approxd.ptr.p_int[p] = dnew;
44466     if( dnew<s->smallestdegree )
44467     {
44468         s->smallestdegree = dnew;
44469     }
44470 }
44471 
44472 
44473 /*************************************************************************
44474 Update optional exact degree. Silently returns if vertex set does not store
44475 exact degrees.
44476 
44477 INPUT PARAMETERS
44478     S           -   vertex set
44479     P           -   vertex to be updated
44480     D           -   new degree
44481 
44482 OUTPUT PARAMETERS
44483     S           -   modified
44484 
44485 
44486   -- ALGLIB PROJECT --
44487      Copyright 05.10.2020 by Bochkanov Sergey.
44488 *************************************************************************/
amdordering_vtxupdateexactdegree(amdvertexset * s,ae_int_t p,ae_int_t d,ae_state * _state)44489 static void amdordering_vtxupdateexactdegree(amdvertexset* s,
44490      ae_int_t p,
44491      ae_int_t d,
44492      ae_state *_state)
44493 {
44494 
44495 
44496     if( !s->checkexactdegrees )
44497     {
44498         return;
44499     }
44500     s->optionalexactd.ptr.p_int[p] = d;
44501 }
44502 
44503 
44504 /*************************************************************************
44505 This function selects K-th  pivot  with  minimum  approximate  degree  and
44506 generates permutation that reorders variable to the K-th position  in  the
44507 matrix.
44508 
44509 Due to supernodal structure of the matrix more than one pivot variable can
44510 be selected and moved to the beginning. The actual count of pivots selected
44511 is returned in NodeSize.
44512 
44513 INPUT PARAMETERS
44514     Buf         -   properly initialized buffer object
44515     K           -   pivot index
44516 
44517 OUTPUT PARAMETERS
44518     Buf.Perm    -   entries [K,K+NodeSize) are initialized by permutation
44519     Buf.InvPerm -   entries [K,K+NodeSize) are initialized by permutation
44520     Buf.ColumnSwaps-entries [K,K+NodeSize) are initialized by permutation
44521     P           -   pivot supervariable
44522     NodeSize    -   supernode size
44523 
44524   -- ALGLIB PROJECT --
44525      Copyright 05.10.2020 by Bochkanov Sergey.
44526 *************************************************************************/
amdordering_amdselectpivotelement(amdbuffer * buf,ae_int_t k,ae_int_t * p,ae_int_t * nodesize,ae_state * _state)44527 static void amdordering_amdselectpivotelement(amdbuffer* buf,
44528      ae_int_t k,
44529      ae_int_t* p,
44530      ae_int_t* nodesize,
44531      ae_state *_state)
44532 {
44533     ae_int_t i;
44534     ae_int_t j;
44535 
44536     *p = 0;
44537     *nodesize = 0;
44538 
44539     *p = amdordering_vtxgetapproxmindegree(&buf->vertexdegrees, _state);
44540     ae_assert(*p>=0, "GenerateAMDPermutation: integrity check 3634 failed", _state);
44541     ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, *p, _state)>=0, "integrity check RDFD2 failed", _state);
44542     *nodesize = 0;
44543     amdordering_knsstartenumeration(&buf->setsuper, *p, _state);
44544     while(amdordering_knsenumerate(&buf->setsuper, &j, _state))
44545     {
44546         i = buf->perm.ptr.p_int[j];
44547         buf->columnswaps.ptr.p_int[k+(*nodesize)] = i;
44548         buf->invperm.ptr.p_int[i] = buf->invperm.ptr.p_int[k+(*nodesize)];
44549         buf->invperm.ptr.p_int[k+(*nodesize)] = j;
44550         buf->perm.ptr.p_int[buf->invperm.ptr.p_int[i]] = i;
44551         buf->perm.ptr.p_int[buf->invperm.ptr.p_int[k+(*nodesize)]] = k+(*nodesize);
44552         inc(nodesize, _state);
44553     }
44554     ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, *p, _state)>=0&&(!buf->checkexactdegrees||amdordering_vtxgetexact(&buf->vertexdegrees, *p, _state)>=0), "AMD: integrity check RDFD failed", _state);
44555 }
44556 
44557 
44558 /*************************************************************************
44559 This function computes nonzero pattern of Lp, the column that is added  to
44560 the lower triangular Cholesky factor.
44561 
44562 INPUT PARAMETERS
44563     Buf         -   properly initialized buffer object
44564     P           -   pivot column
44565 
44566 OUTPUT PARAMETERS
44567     Buf.setP    -   initialized with setSuper[P]
44568     Buf.Lp      -   initialized with Lp\P
44569     Buf.setRp   -   initialized with Lp\{P+Q}
44570     Buf.Ep      -   initialized with setE[P]
44571     Buf.mtxL    -   L := L+Lp
44572     Buf.Ls      -   first Buf.LSCnt elements contain subset of Lp elements
44573                     that are principal nodes in supervariables.
44574 
44575   -- ALGLIB PROJECT --
44576      Copyright 05.10.2020 by Bochkanov Sergey.
44577 *************************************************************************/
amdordering_amdcomputelp(amdbuffer * buf,ae_int_t p,ae_state * _state)44578 static void amdordering_amdcomputelp(amdbuffer* buf,
44579      ae_int_t p,
44580      ae_state *_state)
44581 {
44582     ae_int_t i;
44583 
44584 
44585     amdordering_nsclear(&buf->setp, _state);
44586     amdordering_nsaddkth(&buf->setp, &buf->setsuper, p, _state);
44587     amdordering_nsclear(&buf->lp, _state);
44588     amdordering_nsaddkth(&buf->lp, &buf->seta, p, _state);
44589     amdordering_knsstartenumeration(&buf->sete, p, _state);
44590     while(amdordering_knsenumerate(&buf->sete, &i, _state))
44591     {
44592         amdordering_mtxaddcolumnto(&buf->mtxl, i, &buf->lp, _state);
44593     }
44594     amdordering_nssubtractkth(&buf->lp, &buf->setsuper, p, _state);
44595     amdordering_nscopy(&buf->lp, &buf->setrp, _state);
44596     amdordering_nssubtract1(&buf->setrp, &buf->setq, _state);
44597     buf->lscnt = 0;
44598     amdordering_nsstartenumeration(&buf->lp, _state);
44599     while(amdordering_nsenumerate(&buf->lp, &i, _state))
44600     {
44601         ae_assert(!buf->iseliminated.ptr.p_bool[i], "AMD: integrity check 0740 failed", _state);
44602         amdordering_mtxinsertnewelement(&buf->mtxl, i, p, _state);
44603         if( buf->issupernode.ptr.p_bool[i] )
44604         {
44605             buf->ls.ptr.p_int[buf->lscnt] = i;
44606             buf->lscnt = buf->lscnt+1;
44607         }
44608     }
44609     amdordering_nsclear(&buf->ep, _state);
44610     amdordering_nsaddkth(&buf->ep, &buf->sete, p, _state);
44611 }
44612 
44613 
44614 /*************************************************************************
44615 Having output of AMDComputeLp() in the Buf object, this function  performs
44616 mass elimination in the quotient graph.
44617 
44618 INPUT PARAMETERS
44619     Buf         -   properly initialized buffer object
44620     P           -   pivot column
44621     K           -   number of already eliminated columns (P-th is not counted)
44622     Tau         -   variables with degrees higher than Tau will be classified
44623                     as quasidense
44624 
44625 OUTPUT PARAMETERS
44626     Buf.setA    -   Lp is eliminated from setA
44627     Buf.setE    -   Ep is eliminated from setE, P is added
44628     approxD     -   updated
44629     Buf.setQSuperCand-   contains candidates for quasidense status assignment
44630 
44631   -- ALGLIB PROJECT --
44632      Copyright 05.10.2020 by Bochkanov Sergey.
44633 *************************************************************************/
amdordering_amdmasselimination(amdbuffer * buf,ae_int_t p,ae_int_t k,ae_int_t tau,ae_state * _state)44634 static void amdordering_amdmasselimination(amdbuffer* buf,
44635      ae_int_t p,
44636      ae_int_t k,
44637      ae_int_t tau,
44638      ae_state *_state)
44639 {
44640     ae_int_t n;
44641     ae_int_t lidx;
44642     ae_int_t lpi;
44643     ae_int_t cntsuperi;
44644     ae_int_t cntq;
44645     ae_int_t cntainoti;
44646     ae_int_t cntainotqi;
44647     ae_int_t cntlpnoti;
44648     ae_int_t cntlpnotqi;
44649     ae_int_t cc;
44650     ae_int_t j;
44651     ae_int_t e;
44652     ae_int_t we;
44653     ae_int_t cnttoclean;
44654     ae_int_t idxbegin;
44655     ae_int_t idxend;
44656     ae_int_t jj;
44657     ae_int_t bnd0;
44658     ae_int_t bnd1;
44659     ae_int_t bnd2;
44660     ae_int_t d;
44661 
44662 
44663     n = buf->n;
44664     ivectorsetlengthatleast(&buf->tmp0, n, _state);
44665     cnttoclean = 0;
44666     for(lidx=0; lidx<=buf->lscnt-1; lidx++)
44667     {
44668         if( buf->setq.locationof.ptr.p_int[buf->ls.ptr.p_int[lidx]]<0 )
44669         {
44670             lpi = buf->ls.ptr.p_int[lidx];
44671             cntsuperi = amdordering_knscountkth(&buf->setsuper, lpi, _state);
44672             amdordering_knsdirectaccess(&buf->sete, lpi, &idxbegin, &idxend, _state);
44673             for(jj=idxbegin; jj<=idxend-1; jj++)
44674             {
44675                 e = buf->sete.data.ptr.p_int[jj];
44676                 we = buf->arrwe.ptr.p_int[e];
44677                 if( we<0 )
44678                 {
44679                     we = amdordering_mtxcountcolumnnot(&buf->mtxl, e, &buf->setq, _state);
44680                     buf->tmp0.ptr.p_int[cnttoclean] = e;
44681                     cnttoclean = cnttoclean+1;
44682                 }
44683                 buf->arrwe.ptr.p_int[e] = we-cntsuperi;
44684             }
44685         }
44686     }
44687     amdordering_nsclear(&buf->setqsupercand, _state);
44688     for(lidx=0; lidx<=buf->lscnt-1; lidx++)
44689     {
44690         if( buf->setq.locationof.ptr.p_int[buf->ls.ptr.p_int[lidx]]<0 )
44691         {
44692             lpi = buf->ls.ptr.p_int[lidx];
44693             amdordering_knssubtract1(&buf->seta, lpi, &buf->lp, _state);
44694             amdordering_knssubtract1(&buf->seta, lpi, &buf->setp, _state);
44695             amdordering_knssubtract1(&buf->sete, lpi, &buf->ep, _state);
44696             amdordering_knsaddnewelement(&buf->sete, lpi, p, _state);
44697             if( buf->extendeddebug )
44698             {
44699                 ae_assert(amdordering_knscountnotkth(&buf->seta, lpi, &buf->setsuper, lpi, _state)==amdordering_knscountkth(&buf->seta, lpi, _state), "AMD: integrity check 454F failed", _state);
44700                 ae_assert(amdordering_knscountandkth(&buf->seta, lpi, &buf->setsuper, lpi, _state)==0, "AMD: integrity check kl5nv failed", _state);
44701                 ae_assert(amdordering_nscountandkth(&buf->lp, &buf->setsuper, lpi, _state)==amdordering_knscountkth(&buf->setsuper, lpi, _state), "AMD: integrity check 8463 failed", _state);
44702             }
44703             cntq = amdordering_nscount(&buf->setq, _state);
44704             cntsuperi = amdordering_knscountkth(&buf->setsuper, lpi, _state);
44705             cntainoti = amdordering_knscountkth(&buf->seta, lpi, _state);
44706             if( cntq>0 )
44707             {
44708                 cntainotqi = amdordering_knscountnot(&buf->seta, lpi, &buf->setq, _state);
44709             }
44710             else
44711             {
44712                 cntainotqi = cntainoti;
44713             }
44714             cntlpnoti = amdordering_nscount(&buf->lp, _state)-cntsuperi;
44715             cntlpnotqi = amdordering_nscount(&buf->setrp, _state)-cntsuperi;
44716             cc = 0;
44717             amdordering_knsdirectaccess(&buf->sete, lpi, &idxbegin, &idxend, _state);
44718             for(jj=idxbegin; jj<=idxend-1; jj++)
44719             {
44720                 j = buf->sete.data.ptr.p_int[jj];
44721                 if( j==p )
44722                 {
44723                     continue;
44724                 }
44725                 e = buf->arrwe.ptr.p_int[j];
44726                 if( e<0 )
44727                 {
44728                     if( cntq>0 )
44729                     {
44730                         e = amdordering_mtxcountcolumnnot(&buf->mtxl, j, &buf->setq, _state);
44731                     }
44732                     else
44733                     {
44734                         e = amdordering_mtxcountcolumn(&buf->mtxl, j, _state);
44735                     }
44736                 }
44737                 cc = cc+e;
44738             }
44739             bnd0 = n-k-amdordering_nscount(&buf->setp, _state);
44740             bnd1 = amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)+cntlpnoti;
44741             bnd2 = cntq+cntainotqi+cntlpnotqi+cc;
44742             d = imin3(bnd0, bnd1, bnd2, _state);
44743             amdordering_vtxupdateapproximatedegree(&buf->vertexdegrees, lpi, d, _state);
44744             if( tau>0&&d+cntsuperi>tau )
44745             {
44746                 amdordering_nsaddelement(&buf->setqsupercand, lpi, _state);
44747             }
44748             if( buf->checkexactdegrees )
44749             {
44750                 amdordering_nsclear(&buf->exactdegreetmp0, _state);
44751                 amdordering_knsstartenumeration(&buf->sete, lpi, _state);
44752                 while(amdordering_knsenumerate(&buf->sete, &j, _state))
44753                 {
44754                     amdordering_mtxaddcolumnto(&buf->mtxl, j, &buf->exactdegreetmp0, _state);
44755                 }
44756                 amdordering_vtxupdateexactdegree(&buf->vertexdegrees, lpi, cntainoti+amdordering_nscountnotkth(&buf->exactdegreetmp0, &buf->setsuper, lpi, _state), _state);
44757                 ae_assert((amdordering_knscountkth(&buf->sete, lpi, _state)>2||cntq>0)||amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)==amdordering_vtxgetexact(&buf->vertexdegrees, lpi, _state), "AMD: integrity check 7206 failed", _state);
44758                 ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)>=amdordering_vtxgetexact(&buf->vertexdegrees, lpi, _state), "AMD: integrity check 8206 failed", _state);
44759             }
44760         }
44761     }
44762     for(j=0; j<=cnttoclean-1; j++)
44763     {
44764         buf->arrwe.ptr.p_int[buf->tmp0.ptr.p_int[j]] = -1;
44765     }
44766 }
44767 
44768 
44769 /*************************************************************************
44770 After mass elimination, but before removal of vertex  P,  we  may  perform
44771 supernode detection. Only variables/supernodes in  Lp  (P  itself  is  NOT
44772 included) can be merged into larger supernodes.
44773 
44774 INPUT PARAMETERS
44775     Buf         -   properly initialized buffer object
44776 
44777 OUTPUT PARAMETERS
44778     Buf         -   following fields of Buf may be modified:
44779                     * Buf.setSuper
44780                     * Buf.setA
44781                     * Buf.setE
44782                     * Buf.IsSupernode
44783                     * ApproxD and ExactD
44784 
44785   -- ALGLIB PROJECT --
44786      Copyright 05.10.2020 by Bochkanov Sergey.
44787 *************************************************************************/
amdordering_amddetectsupernodes(amdbuffer * buf,ae_state * _state)44788 static void amdordering_amddetectsupernodes(amdbuffer* buf,
44789      ae_state *_state)
44790 {
44791     ae_int_t n;
44792     ae_int_t i;
44793     ae_int_t j;
44794     ae_int_t cnt;
44795     ae_int_t lpi;
44796     ae_int_t lpj;
44797     ae_int_t nj;
44798     ae_int_t hashi;
44799 
44800 
44801     n = buf->n;
44802     ivectorsetlengthatleast(&buf->sncandidates, n, _state);
44803     if( buf->lscnt<2 )
44804     {
44805         return;
44806     }
44807     for(i=0; i<=buf->lscnt-1; i++)
44808     {
44809         if( buf->setq.locationof.ptr.p_int[buf->ls.ptr.p_int[i]]<0 )
44810         {
44811             lpi = buf->ls.ptr.p_int[i];
44812             hashi = (amdordering_knssumkth(&buf->seta, lpi, _state)+amdordering_knssumkth(&buf->sete, lpi, _state))%n;
44813             amdordering_nsaddelement(&buf->nonemptybuckets, hashi, _state);
44814             amdordering_knsaddnewelement(&buf->hashbuckets, hashi, lpi, _state);
44815         }
44816     }
44817     amdordering_nsstartenumeration(&buf->nonemptybuckets, _state);
44818     while(amdordering_nsenumerate(&buf->nonemptybuckets, &hashi, _state))
44819     {
44820         if( amdordering_knscountkth(&buf->hashbuckets, hashi, _state)>=2 )
44821         {
44822             cnt = 0;
44823             amdordering_knsstartenumeration(&buf->hashbuckets, hashi, _state);
44824             while(amdordering_knsenumerate(&buf->hashbuckets, &i, _state))
44825             {
44826                 buf->sncandidates.ptr.p_int[cnt] = i;
44827                 cnt = cnt+1;
44828             }
44829             for(i=cnt-1; i>=0; i--)
44830             {
44831                 for(j=cnt-1; j>=i+1; j--)
44832                 {
44833                     if( buf->issupernode.ptr.p_bool[buf->sncandidates.ptr.p_int[i]]&&buf->issupernode.ptr.p_bool[buf->sncandidates.ptr.p_int[j]] )
44834                     {
44835                         lpi = buf->sncandidates.ptr.p_int[i];
44836                         lpj = buf->sncandidates.ptr.p_int[j];
44837                         amdordering_nsclear(&buf->adji, _state);
44838                         amdordering_nsclear(&buf->adjj, _state);
44839                         amdordering_nsaddkth(&buf->adji, &buf->seta, lpi, _state);
44840                         amdordering_nsaddkth(&buf->adjj, &buf->seta, lpj, _state);
44841                         amdordering_nsaddkth(&buf->adji, &buf->sete, lpi, _state);
44842                         amdordering_nsaddkth(&buf->adjj, &buf->sete, lpj, _state);
44843                         amdordering_nsaddelement(&buf->adji, lpi, _state);
44844                         amdordering_nsaddelement(&buf->adji, lpj, _state);
44845                         amdordering_nsaddelement(&buf->adjj, lpi, _state);
44846                         amdordering_nsaddelement(&buf->adjj, lpj, _state);
44847                         if( !amdordering_nsequal(&buf->adji, &buf->adjj, _state) )
44848                         {
44849                             continue;
44850                         }
44851                         if( buf->extendeddebug )
44852                         {
44853                             ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)>=1&&(!buf->checkexactdegrees||amdordering_vtxgetexact(&buf->vertexdegrees, lpi, _state)>=1), "AMD: integrity check &GBFF1 failed", _state);
44854                             ae_assert(amdordering_vtxgetapprox(&buf->vertexdegrees, lpj, _state)>=1&&(!buf->checkexactdegrees||amdordering_vtxgetexact(&buf->vertexdegrees, lpj, _state)>=1), "AMD: integrity check &GBFF2 failed", _state);
44855                             ae_assert(amdordering_knscountandkth(&buf->setsuper, lpi, &buf->setsuper, lpj, _state)==0, "AMD: integrity check &GBFF3 failed", _state);
44856                         }
44857                         nj = amdordering_knscountkth(&buf->setsuper, lpj, _state);
44858                         amdordering_knsaddkthdistinct(&buf->setsuper, lpi, &buf->setsuper, lpj, _state);
44859                         amdordering_knsclearkthreclaim(&buf->setsuper, lpj, _state);
44860                         amdordering_knsclearkthreclaim(&buf->seta, lpj, _state);
44861                         amdordering_knsclearkthreclaim(&buf->sete, lpj, _state);
44862                         buf->issupernode.ptr.p_bool[lpj] = ae_false;
44863                         amdordering_vtxremovevertex(&buf->vertexdegrees, lpj, _state);
44864                         amdordering_vtxupdateapproximatedegree(&buf->vertexdegrees, lpi, amdordering_vtxgetapprox(&buf->vertexdegrees, lpi, _state)-nj, _state);
44865                         if( buf->checkexactdegrees )
44866                         {
44867                             amdordering_vtxupdateexactdegree(&buf->vertexdegrees, lpi, amdordering_vtxgetexact(&buf->vertexdegrees, lpi, _state)-nj, _state);
44868                         }
44869                     }
44870                 }
44871             }
44872         }
44873         amdordering_knsclearkthnoreclaim(&buf->hashbuckets, hashi, _state);
44874     }
44875     amdordering_nsclear(&buf->nonemptybuckets, _state);
44876 }
44877 
44878 
_amdnset_init(void * _p,ae_state * _state,ae_bool make_automatic)44879 void _amdnset_init(void* _p, ae_state *_state, ae_bool make_automatic)
44880 {
44881     amdnset *p = (amdnset*)_p;
44882     ae_touch_ptr((void*)p);
44883     ae_vector_init(&p->items, 0, DT_INT, _state, make_automatic);
44884     ae_vector_init(&p->locationof, 0, DT_INT, _state, make_automatic);
44885 }
44886 
44887 
_amdnset_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)44888 void _amdnset_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
44889 {
44890     amdnset *dst = (amdnset*)_dst;
44891     amdnset *src = (amdnset*)_src;
44892     dst->n = src->n;
44893     dst->nstored = src->nstored;
44894     ae_vector_init_copy(&dst->items, &src->items, _state, make_automatic);
44895     ae_vector_init_copy(&dst->locationof, &src->locationof, _state, make_automatic);
44896     dst->iteridx = src->iteridx;
44897 }
44898 
44899 
_amdnset_clear(void * _p)44900 void _amdnset_clear(void* _p)
44901 {
44902     amdnset *p = (amdnset*)_p;
44903     ae_touch_ptr((void*)p);
44904     ae_vector_clear(&p->items);
44905     ae_vector_clear(&p->locationof);
44906 }
44907 
44908 
_amdnset_destroy(void * _p)44909 void _amdnset_destroy(void* _p)
44910 {
44911     amdnset *p = (amdnset*)_p;
44912     ae_touch_ptr((void*)p);
44913     ae_vector_destroy(&p->items);
44914     ae_vector_destroy(&p->locationof);
44915 }
44916 
44917 
_amdknset_init(void * _p,ae_state * _state,ae_bool make_automatic)44918 void _amdknset_init(void* _p, ae_state *_state, ae_bool make_automatic)
44919 {
44920     amdknset *p = (amdknset*)_p;
44921     ae_touch_ptr((void*)p);
44922     ae_vector_init(&p->flagarray, 0, DT_INT, _state, make_automatic);
44923     ae_vector_init(&p->vbegin, 0, DT_INT, _state, make_automatic);
44924     ae_vector_init(&p->vallocated, 0, DT_INT, _state, make_automatic);
44925     ae_vector_init(&p->vcnt, 0, DT_INT, _state, make_automatic);
44926     ae_vector_init(&p->data, 0, DT_INT, _state, make_automatic);
44927 }
44928 
44929 
_amdknset_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)44930 void _amdknset_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
44931 {
44932     amdknset *dst = (amdknset*)_dst;
44933     amdknset *src = (amdknset*)_src;
44934     dst->k = src->k;
44935     dst->n = src->n;
44936     ae_vector_init_copy(&dst->flagarray, &src->flagarray, _state, make_automatic);
44937     ae_vector_init_copy(&dst->vbegin, &src->vbegin, _state, make_automatic);
44938     ae_vector_init_copy(&dst->vallocated, &src->vallocated, _state, make_automatic);
44939     ae_vector_init_copy(&dst->vcnt, &src->vcnt, _state, make_automatic);
44940     ae_vector_init_copy(&dst->data, &src->data, _state, make_automatic);
44941     dst->dataused = src->dataused;
44942     dst->iterrow = src->iterrow;
44943     dst->iteridx = src->iteridx;
44944 }
44945 
44946 
_amdknset_clear(void * _p)44947 void _amdknset_clear(void* _p)
44948 {
44949     amdknset *p = (amdknset*)_p;
44950     ae_touch_ptr((void*)p);
44951     ae_vector_clear(&p->flagarray);
44952     ae_vector_clear(&p->vbegin);
44953     ae_vector_clear(&p->vallocated);
44954     ae_vector_clear(&p->vcnt);
44955     ae_vector_clear(&p->data);
44956 }
44957 
44958 
_amdknset_destroy(void * _p)44959 void _amdknset_destroy(void* _p)
44960 {
44961     amdknset *p = (amdknset*)_p;
44962     ae_touch_ptr((void*)p);
44963     ae_vector_destroy(&p->flagarray);
44964     ae_vector_destroy(&p->vbegin);
44965     ae_vector_destroy(&p->vallocated);
44966     ae_vector_destroy(&p->vcnt);
44967     ae_vector_destroy(&p->data);
44968 }
44969 
44970 
_amdvertexset_init(void * _p,ae_state * _state,ae_bool make_automatic)44971 void _amdvertexset_init(void* _p, ae_state *_state, ae_bool make_automatic)
44972 {
44973     amdvertexset *p = (amdvertexset*)_p;
44974     ae_touch_ptr((void*)p);
44975     ae_vector_init(&p->approxd, 0, DT_INT, _state, make_automatic);
44976     ae_vector_init(&p->optionalexactd, 0, DT_INT, _state, make_automatic);
44977     ae_vector_init(&p->isvertex, 0, DT_BOOL, _state, make_automatic);
44978     ae_vector_init(&p->vbegin, 0, DT_INT, _state, make_automatic);
44979     ae_vector_init(&p->vprev, 0, DT_INT, _state, make_automatic);
44980     ae_vector_init(&p->vnext, 0, DT_INT, _state, make_automatic);
44981 }
44982 
44983 
_amdvertexset_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)44984 void _amdvertexset_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
44985 {
44986     amdvertexset *dst = (amdvertexset*)_dst;
44987     amdvertexset *src = (amdvertexset*)_src;
44988     dst->n = src->n;
44989     dst->checkexactdegrees = src->checkexactdegrees;
44990     dst->smallestdegree = src->smallestdegree;
44991     ae_vector_init_copy(&dst->approxd, &src->approxd, _state, make_automatic);
44992     ae_vector_init_copy(&dst->optionalexactd, &src->optionalexactd, _state, make_automatic);
44993     ae_vector_init_copy(&dst->isvertex, &src->isvertex, _state, make_automatic);
44994     ae_vector_init_copy(&dst->vbegin, &src->vbegin, _state, make_automatic);
44995     ae_vector_init_copy(&dst->vprev, &src->vprev, _state, make_automatic);
44996     ae_vector_init_copy(&dst->vnext, &src->vnext, _state, make_automatic);
44997 }
44998 
44999 
_amdvertexset_clear(void * _p)45000 void _amdvertexset_clear(void* _p)
45001 {
45002     amdvertexset *p = (amdvertexset*)_p;
45003     ae_touch_ptr((void*)p);
45004     ae_vector_clear(&p->approxd);
45005     ae_vector_clear(&p->optionalexactd);
45006     ae_vector_clear(&p->isvertex);
45007     ae_vector_clear(&p->vbegin);
45008     ae_vector_clear(&p->vprev);
45009     ae_vector_clear(&p->vnext);
45010 }
45011 
45012 
_amdvertexset_destroy(void * _p)45013 void _amdvertexset_destroy(void* _p)
45014 {
45015     amdvertexset *p = (amdvertexset*)_p;
45016     ae_touch_ptr((void*)p);
45017     ae_vector_destroy(&p->approxd);
45018     ae_vector_destroy(&p->optionalexactd);
45019     ae_vector_destroy(&p->isvertex);
45020     ae_vector_destroy(&p->vbegin);
45021     ae_vector_destroy(&p->vprev);
45022     ae_vector_destroy(&p->vnext);
45023 }
45024 
45025 
_amdllmatrix_init(void * _p,ae_state * _state,ae_bool make_automatic)45026 void _amdllmatrix_init(void* _p, ae_state *_state, ae_bool make_automatic)
45027 {
45028     amdllmatrix *p = (amdllmatrix*)_p;
45029     ae_touch_ptr((void*)p);
45030     ae_vector_init(&p->vbegin, 0, DT_INT, _state, make_automatic);
45031     ae_vector_init(&p->vcolcnt, 0, DT_INT, _state, make_automatic);
45032     ae_vector_init(&p->entries, 0, DT_INT, _state, make_automatic);
45033 }
45034 
45035 
_amdllmatrix_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)45036 void _amdllmatrix_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
45037 {
45038     amdllmatrix *dst = (amdllmatrix*)_dst;
45039     amdllmatrix *src = (amdllmatrix*)_src;
45040     dst->n = src->n;
45041     ae_vector_init_copy(&dst->vbegin, &src->vbegin, _state, make_automatic);
45042     ae_vector_init_copy(&dst->vcolcnt, &src->vcolcnt, _state, make_automatic);
45043     ae_vector_init_copy(&dst->entries, &src->entries, _state, make_automatic);
45044     dst->entriesinitialized = src->entriesinitialized;
45045 }
45046 
45047 
_amdllmatrix_clear(void * _p)45048 void _amdllmatrix_clear(void* _p)
45049 {
45050     amdllmatrix *p = (amdllmatrix*)_p;
45051     ae_touch_ptr((void*)p);
45052     ae_vector_clear(&p->vbegin);
45053     ae_vector_clear(&p->vcolcnt);
45054     ae_vector_clear(&p->entries);
45055 }
45056 
45057 
_amdllmatrix_destroy(void * _p)45058 void _amdllmatrix_destroy(void* _p)
45059 {
45060     amdllmatrix *p = (amdllmatrix*)_p;
45061     ae_touch_ptr((void*)p);
45062     ae_vector_destroy(&p->vbegin);
45063     ae_vector_destroy(&p->vcolcnt);
45064     ae_vector_destroy(&p->entries);
45065 }
45066 
45067 
_amdbuffer_init(void * _p,ae_state * _state,ae_bool make_automatic)45068 void _amdbuffer_init(void* _p, ae_state *_state, ae_bool make_automatic)
45069 {
45070     amdbuffer *p = (amdbuffer*)_p;
45071     ae_touch_ptr((void*)p);
45072     ae_vector_init(&p->iseliminated, 0, DT_BOOL, _state, make_automatic);
45073     ae_vector_init(&p->issupernode, 0, DT_BOOL, _state, make_automatic);
45074     _amdknset_init(&p->setsuper, _state, make_automatic);
45075     _amdknset_init(&p->seta, _state, make_automatic);
45076     _amdknset_init(&p->sete, _state, make_automatic);
45077     _amdllmatrix_init(&p->mtxl, _state, make_automatic);
45078     _amdvertexset_init(&p->vertexdegrees, _state, make_automatic);
45079     _amdnset_init(&p->setq, _state, make_automatic);
45080     ae_vector_init(&p->perm, 0, DT_INT, _state, make_automatic);
45081     ae_vector_init(&p->invperm, 0, DT_INT, _state, make_automatic);
45082     ae_vector_init(&p->columnswaps, 0, DT_INT, _state, make_automatic);
45083     _amdnset_init(&p->setp, _state, make_automatic);
45084     _amdnset_init(&p->lp, _state, make_automatic);
45085     _amdnset_init(&p->setrp, _state, make_automatic);
45086     _amdnset_init(&p->ep, _state, make_automatic);
45087     _amdnset_init(&p->adji, _state, make_automatic);
45088     _amdnset_init(&p->adjj, _state, make_automatic);
45089     ae_vector_init(&p->ls, 0, DT_INT, _state, make_automatic);
45090     _amdnset_init(&p->setqsupercand, _state, make_automatic);
45091     _amdnset_init(&p->exactdegreetmp0, _state, make_automatic);
45092     _amdknset_init(&p->hashbuckets, _state, make_automatic);
45093     _amdnset_init(&p->nonemptybuckets, _state, make_automatic);
45094     ae_vector_init(&p->sncandidates, 0, DT_INT, _state, make_automatic);
45095     ae_vector_init(&p->tmp0, 0, DT_INT, _state, make_automatic);
45096     ae_vector_init(&p->arrwe, 0, DT_INT, _state, make_automatic);
45097     ae_matrix_init(&p->dbga, 0, 0, DT_REAL, _state, make_automatic);
45098 }
45099 
45100 
_amdbuffer_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)45101 void _amdbuffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
45102 {
45103     amdbuffer *dst = (amdbuffer*)_dst;
45104     amdbuffer *src = (amdbuffer*)_src;
45105     dst->n = src->n;
45106     dst->extendeddebug = src->extendeddebug;
45107     dst->checkexactdegrees = src->checkexactdegrees;
45108     ae_vector_init_copy(&dst->iseliminated, &src->iseliminated, _state, make_automatic);
45109     ae_vector_init_copy(&dst->issupernode, &src->issupernode, _state, make_automatic);
45110     _amdknset_init_copy(&dst->setsuper, &src->setsuper, _state, make_automatic);
45111     _amdknset_init_copy(&dst->seta, &src->seta, _state, make_automatic);
45112     _amdknset_init_copy(&dst->sete, &src->sete, _state, make_automatic);
45113     _amdllmatrix_init_copy(&dst->mtxl, &src->mtxl, _state, make_automatic);
45114     _amdvertexset_init_copy(&dst->vertexdegrees, &src->vertexdegrees, _state, make_automatic);
45115     _amdnset_init_copy(&dst->setq, &src->setq, _state, make_automatic);
45116     ae_vector_init_copy(&dst->perm, &src->perm, _state, make_automatic);
45117     ae_vector_init_copy(&dst->invperm, &src->invperm, _state, make_automatic);
45118     ae_vector_init_copy(&dst->columnswaps, &src->columnswaps, _state, make_automatic);
45119     _amdnset_init_copy(&dst->setp, &src->setp, _state, make_automatic);
45120     _amdnset_init_copy(&dst->lp, &src->lp, _state, make_automatic);
45121     _amdnset_init_copy(&dst->setrp, &src->setrp, _state, make_automatic);
45122     _amdnset_init_copy(&dst->ep, &src->ep, _state, make_automatic);
45123     _amdnset_init_copy(&dst->adji, &src->adji, _state, make_automatic);
45124     _amdnset_init_copy(&dst->adjj, &src->adjj, _state, make_automatic);
45125     ae_vector_init_copy(&dst->ls, &src->ls, _state, make_automatic);
45126     dst->lscnt = src->lscnt;
45127     _amdnset_init_copy(&dst->setqsupercand, &src->setqsupercand, _state, make_automatic);
45128     _amdnset_init_copy(&dst->exactdegreetmp0, &src->exactdegreetmp0, _state, make_automatic);
45129     _amdknset_init_copy(&dst->hashbuckets, &src->hashbuckets, _state, make_automatic);
45130     _amdnset_init_copy(&dst->nonemptybuckets, &src->nonemptybuckets, _state, make_automatic);
45131     ae_vector_init_copy(&dst->sncandidates, &src->sncandidates, _state, make_automatic);
45132     ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
45133     ae_vector_init_copy(&dst->arrwe, &src->arrwe, _state, make_automatic);
45134     ae_matrix_init_copy(&dst->dbga, &src->dbga, _state, make_automatic);
45135 }
45136 
45137 
_amdbuffer_clear(void * _p)45138 void _amdbuffer_clear(void* _p)
45139 {
45140     amdbuffer *p = (amdbuffer*)_p;
45141     ae_touch_ptr((void*)p);
45142     ae_vector_clear(&p->iseliminated);
45143     ae_vector_clear(&p->issupernode);
45144     _amdknset_clear(&p->setsuper);
45145     _amdknset_clear(&p->seta);
45146     _amdknset_clear(&p->sete);
45147     _amdllmatrix_clear(&p->mtxl);
45148     _amdvertexset_clear(&p->vertexdegrees);
45149     _amdnset_clear(&p->setq);
45150     ae_vector_clear(&p->perm);
45151     ae_vector_clear(&p->invperm);
45152     ae_vector_clear(&p->columnswaps);
45153     _amdnset_clear(&p->setp);
45154     _amdnset_clear(&p->lp);
45155     _amdnset_clear(&p->setrp);
45156     _amdnset_clear(&p->ep);
45157     _amdnset_clear(&p->adji);
45158     _amdnset_clear(&p->adjj);
45159     ae_vector_clear(&p->ls);
45160     _amdnset_clear(&p->setqsupercand);
45161     _amdnset_clear(&p->exactdegreetmp0);
45162     _amdknset_clear(&p->hashbuckets);
45163     _amdnset_clear(&p->nonemptybuckets);
45164     ae_vector_clear(&p->sncandidates);
45165     ae_vector_clear(&p->tmp0);
45166     ae_vector_clear(&p->arrwe);
45167     ae_matrix_clear(&p->dbga);
45168 }
45169 
45170 
_amdbuffer_destroy(void * _p)45171 void _amdbuffer_destroy(void* _p)
45172 {
45173     amdbuffer *p = (amdbuffer*)_p;
45174     ae_touch_ptr((void*)p);
45175     ae_vector_destroy(&p->iseliminated);
45176     ae_vector_destroy(&p->issupernode);
45177     _amdknset_destroy(&p->setsuper);
45178     _amdknset_destroy(&p->seta);
45179     _amdknset_destroy(&p->sete);
45180     _amdllmatrix_destroy(&p->mtxl);
45181     _amdvertexset_destroy(&p->vertexdegrees);
45182     _amdnset_destroy(&p->setq);
45183     ae_vector_destroy(&p->perm);
45184     ae_vector_destroy(&p->invperm);
45185     ae_vector_destroy(&p->columnswaps);
45186     _amdnset_destroy(&p->setp);
45187     _amdnset_destroy(&p->lp);
45188     _amdnset_destroy(&p->setrp);
45189     _amdnset_destroy(&p->ep);
45190     _amdnset_destroy(&p->adji);
45191     _amdnset_destroy(&p->adjj);
45192     ae_vector_destroy(&p->ls);
45193     _amdnset_destroy(&p->setqsupercand);
45194     _amdnset_destroy(&p->exactdegreetmp0);
45195     _amdknset_destroy(&p->hashbuckets);
45196     _amdnset_destroy(&p->nonemptybuckets);
45197     ae_vector_destroy(&p->sncandidates);
45198     ae_vector_destroy(&p->tmp0);
45199     ae_vector_destroy(&p->arrwe);
45200     ae_matrix_destroy(&p->dbga);
45201 }
45202 
45203 
45204 #endif
45205 #if defined(AE_COMPILE_SPCHOL) || !defined(AE_PARTIAL_BUILD)
45206 
45207 
45208 /*************************************************************************
45209 Informational function, useful for debugging
45210 *************************************************************************/
spsymmgetmaxfastkernel(ae_state * _state)45211 ae_int_t spsymmgetmaxfastkernel(ae_state *_state)
45212 {
45213     ae_int_t result;
45214 
45215 
45216     result = spchol_maxfastkernel;
45217     return result;
45218 }
45219 
45220 
45221 /*************************************************************************
45222 Symbolic phase of Cholesky decomposition.
45223 
45224 Performs preliminary analysis of Cholesky/LDLT factorization.  The  latter
45225 is computed with strictly diagonal D (no Bunch-Kauffman pivoting).
45226 
45227 The analysis object produced by this function will be used later to  guide
45228 actual decomposition.
45229 
45230 Depending on settings specified during factorization, may produce  vanilla
45231 Cholesky or L*D*LT  decomposition  (with  strictly  diagonal  D),  without
45232 permutation or with permutation P (being either  topological  ordering  or
45233 sparsity preserving ordering).
45234 
45235 Thus, A is represented as either L*LT or L*D*LT or P*L*LT*PT or P*L*D*LT*PT.
45236 
45237 NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
45238       matrices. However, numerical stability is guaranteed ONLY for a class
45239       of quasi-definite matrices.
45240 
45241 INPUT PARAMETERS:
45242     A           -   sparse square matrix in CRS format, with LOWER triangle
45243                     being used to store the matrix.
45244     FactType    -   factorization type:
45245                     * 0 for traditional Cholesky
45246                     * 1 for LDLT decomposition with strictly diagonal D
45247     PermType    -   permutation type:
45248                     *-3 for debug improved AMD (a sequence of decreasing
45249                         tail sizes is generated, ~logN in total, even if
45250                         ordering can be done with just one round of AMD).
45251                         This ordering is used to test correctness of
45252                         multiple AMD rounds.
45253                     *-2 for column count ordering (NOT RECOMMENDED!)
45254                     *-1 for absence of permutation
45255                     * 0 for best permutation available
45256                     * 1 for supernodal ordering (improves locality and
45257                       performance, but does NOT change fill-in pattern)
45258                     * 2 for supernodal AMD ordering (improves fill-in)
45259                     * 3 for  improved  AMD  (approximate  minimum  degree)
45260                         ordering with better  handling  of  matrices  with
45261                         dense rows/columns
45262     Analysis    -   can be uninitialized instance, or previous analysis
45263                     results. Previously allocated memory is reused as much
45264                     as possible.
45265     Buf         -   buffer; may be completely uninitialized, or one remained
45266                     from previous calls (including ones with completely
45267                     different matrices). Previously allocated temporary
45268                     space will be reused as much as possible.
45269 
45270 OUTPUT PARAMETERS:
45271     Analysis    -   symbolic analysis of the matrix structure  which  will
45272                     be used later to guide  numerical  factorization.  The
45273                     numerical values are stored internally in the structure,
45274                     but you have to  run  factorization  phase  explicitly
45275                     with SPSymmAnalyze().  You  can  also  reload  another
45276                     matrix with same sparsity pattern with  SPSymmReload()
45277                     or rewrite its diagonal with SPSymmReloadDiagonal().
45278 
45279 This function fails if and only if the matrix A is symbolically degenerate
45280 i.e. has diagonal element which is exactly zero. In  such  case  False  is
45281 returned.
45282 
45283 NOTE: defining 'SCHOLESKY' trace tag will activate tracing.
45284       defining 'SCHOLESKY.SS' trace tag will activate detailed tracing  of
45285       the supernodal structure.
45286 
45287 NOTE: defining 'DEBUG.SLOW' trace tag will  activate  extra-slow  (roughly
45288       N^3 ops) integrity checks, in addition to cheap O(1) ones.
45289 
45290   -- ALGLIB routine --
45291      20.09.2020
45292      Bochkanov Sergey
45293 *************************************************************************/
spsymmanalyze(sparsematrix * a,ae_int_t facttype,ae_int_t permtype,spcholanalysis * analysis,ae_state * _state)45294 ae_bool spsymmanalyze(sparsematrix* a,
45295      ae_int_t facttype,
45296      ae_int_t permtype,
45297      spcholanalysis* analysis,
45298      ae_state *_state)
45299 {
45300     ae_int_t n;
45301     ae_int_t i;
45302     ae_int_t j;
45303     ae_int_t jj;
45304     ae_int_t k;
45305     ae_int_t residual;
45306     ae_int_t tail;
45307     ae_bool permready;
45308     ae_bool result;
45309 
45310 
45311     ae_assert(sparseiscrs(a, _state), "SPSymmAnalyze: A is not stored in CRS format", _state);
45312     ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SPSymmAnalyze: non-square A", _state);
45313     ae_assert(facttype==0||facttype==1, "SPSymmAnalyze: unexpected FactType", _state);
45314     ae_assert((((((permtype==0||permtype==1)||permtype==2)||permtype==3)||permtype==-1)||permtype==-2)||permtype==-3, "SPSymmAnalyze: unexpected PermType", _state);
45315     if( permtype==0 )
45316     {
45317         permtype = 3;
45318     }
45319     result = ae_true;
45320     n = sparsegetnrows(a, _state);
45321     analysis->tasktype = 0;
45322     analysis->n = n;
45323     analysis->unitd = facttype==0;
45324     analysis->permtype = permtype;
45325     analysis->extendeddebug = ae_is_trace_enabled("DEBUG.SLOW")&&n<=100;
45326     analysis->dotrace = ae_is_trace_enabled("SCHOLESKY");
45327     analysis->dotracesupernodalstructure = analysis->dotrace&&ae_is_trace_enabled("SCHOLESKY.SS");
45328     analysis->istopologicalordering = permtype==-1||permtype==1;
45329     analysis->applypermutationtooutput = permtype==-1;
45330     analysis->modtype = 0;
45331     analysis->modparam0 = 0.0;
45332     analysis->modparam1 = 0.0;
45333     analysis->modparam2 = 0.0;
45334     analysis->modparam3 = 0.0;
45335 
45336     /*
45337      * Allocate temporaries
45338      */
45339     ivectorsetlengthatleast(&analysis->tmpparent, n+1, _state);
45340     ivectorsetlengthatleast(&analysis->tmp0, n+1, _state);
45341     ivectorsetlengthatleast(&analysis->tmp1, n+1, _state);
45342     ivectorsetlengthatleast(&analysis->tmp2, n+1, _state);
45343     ivectorsetlengthatleast(&analysis->tmp3, n+1, _state);
45344     ivectorsetlengthatleast(&analysis->tmp4, n+1, _state);
45345     bvectorsetlengthatleast(&analysis->flagarray, n+1, _state);
45346 
45347     /*
45348      * Initial trace message
45349      */
45350     if( analysis->dotrace )
45351     {
45352         ae_trace("\n\n");
45353         ae_trace("////////////////////////////////////////////////////////////////////////////////////////////////////\n");
45354         ae_trace("//  SPARSE CHOLESKY ANALYSIS STARTED                                                              //\n");
45355         ae_trace("////////////////////////////////////////////////////////////////////////////////////////////////////\n");
45356 
45357         /*
45358          * Analyze row statistics
45359          */
45360         ae_trace("=== ANALYZING ROW STATISTICS =======================================================================\n");
45361         ae_trace("row size is:\n");
45362         isetv(n, 1, &analysis->tmp0, _state);
45363         for(i=0; i<=n-1; i++)
45364         {
45365             for(jj=a->ridx.ptr.p_int[i]; jj<=a->didx.ptr.p_int[i]-1; jj++)
45366             {
45367                 j = a->idx.ptr.p_int[jj];
45368                 analysis->tmp0.ptr.p_int[i] = analysis->tmp0.ptr.p_int[i]+1;
45369                 analysis->tmp0.ptr.p_int[j] = analysis->tmp0.ptr.p_int[j]+1;
45370             }
45371         }
45372         k = 1;
45373         while(k<=n)
45374         {
45375             j = 0;
45376             for(i=0; i<=n-1; i++)
45377             {
45378                 if( analysis->tmp0.ptr.p_int[i]>=k&&analysis->tmp0.ptr.p_int[i]<2*k )
45379                 {
45380                     j = j+1;
45381                 }
45382             }
45383             ae_trace("* [%6d..%6d) elements: %6d rows\n",
45384                 (int)(k),
45385                 (int)(2*k),
45386                 (int)(j));
45387             k = k*2;
45388         }
45389     }
45390 
45391     /*
45392      * Initial integrity check - diagonal MUST be symbolically nonzero
45393      */
45394     for(i=0; i<=n-1; i++)
45395     {
45396         if( a->didx.ptr.p_int[i]==a->uidx.ptr.p_int[i] )
45397         {
45398             if( analysis->dotrace )
45399             {
45400                 ae_trace("> the matrix diagonal is symbolically zero, stopping");
45401             }
45402             result = ae_false;
45403             return result;
45404         }
45405     }
45406 
45407     /*
45408      * What type of permutation do we have?
45409      */
45410     if( analysis->istopologicalordering )
45411     {
45412         ae_assert(permtype==-1||permtype==1, "SPSymmAnalyze: integrity check failed (ihebd)", _state);
45413 
45414         /*
45415          * Build topologically ordered elimination tree
45416          */
45417         spchol_buildorderedetree(a, n, &analysis->tmpparent, &analysis->superperm, &analysis->invsuperperm, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->flagarray, _state);
45418         ivectorsetlengthatleast(&analysis->fillinperm, n, _state);
45419         ivectorsetlengthatleast(&analysis->invfillinperm, n, _state);
45420         ivectorsetlengthatleast(&analysis->effectiveperm, n, _state);
45421         ivectorsetlengthatleast(&analysis->inveffectiveperm, n, _state);
45422         for(i=0; i<=n-1; i++)
45423         {
45424             analysis->fillinperm.ptr.p_int[i] = i;
45425             analysis->invfillinperm.ptr.p_int[i] = i;
45426             analysis->effectiveperm.ptr.p_int[i] = analysis->superperm.ptr.p_int[i];
45427             analysis->inveffectiveperm.ptr.p_int[i] = analysis->invsuperperm.ptr.p_int[i];
45428         }
45429 
45430         /*
45431          * Reorder input matrix
45432          */
45433         spchol_topologicalpermutation(a, &analysis->superperm, &analysis->tmpat, _state);
45434 
45435         /*
45436          * Analyze etree, build supernodal structure
45437          */
45438         spchol_createsupernodalstructure(&analysis->tmpat, &analysis->tmpparent, n, analysis, &analysis->node2supernode, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->tmp3, &analysis->tmp4, &analysis->flagarray, _state);
45439 
45440         /*
45441          * Having fully initialized supernodal structure, analyze dependencies
45442          */
45443         spchol_analyzesupernodaldependencies(analysis, a, &analysis->node2supernode, n, &analysis->tmp0, &analysis->tmp1, &analysis->flagarray, _state);
45444 
45445         /*
45446          * Load matrix into the supernodal storage
45447          */
45448         spchol_loadmatrix(analysis, &analysis->tmpat, _state);
45449     }
45450     else
45451     {
45452 
45453         /*
45454          * Generate fill-in reducing permutation
45455          */
45456         permready = ae_false;
45457         if( permtype==-2 )
45458         {
45459             spchol_generatedbgpermutation(a, n, &analysis->fillinperm, &analysis->invfillinperm, _state);
45460             permready = ae_true;
45461         }
45462         if( permtype==2 )
45463         {
45464             generateamdpermutation(a, n, &analysis->fillinperm, &analysis->invfillinperm, &analysis->amdtmp, _state);
45465             permready = ae_true;
45466         }
45467         if( permtype==3||permtype==-3 )
45468         {
45469 
45470             /*
45471              * Perform iterative AMD, with nearly-dense columns being postponed to be handled later.
45472              *
45473              * The current (residual) matrix A is divided into two parts: head, with its columns being
45474              * properly ordered, and tail, with its columns being reordered at the next iteration.
45475              *
45476              * After each partial AMD we compute sparsity pattern of the tail, set it as the new residual
45477              * and repeat iteration.
45478              */
45479             residual = n;
45480             iallocv(n, &analysis->fillinperm, _state);
45481             iallocv(n, &analysis->invfillinperm, _state);
45482             for(i=0; i<=n-1; i++)
45483             {
45484                 analysis->fillinperm.ptr.p_int[i] = i;
45485                 analysis->invfillinperm.ptr.p_int[i] = i;
45486             }
45487             sparsecopybuf(a, &analysis->tmpa, _state);
45488             if( analysis->dotrace )
45489             {
45490                 ae_trace("> multiround AMD, tail=%0d\n",
45491                     (int)(residual));
45492             }
45493             while(residual>0)
45494             {
45495 
45496                 /*
45497                  * Generate partial fill-in reducing permutation (leading Residual-Tail columns are
45498                  * properly ordered, the rest is unordered).
45499                  */
45500                 tail = residual-generateamdpermutationx(&analysis->tmpa, residual, &analysis->tmpperm, &analysis->invtmpperm, 1, &analysis->amdtmp, _state);
45501                 if( permtype==-3 )
45502                 {
45503 
45504                     /*
45505                      * Special debug ordering in order to test correctness of multiple AMD rounds
45506                      */
45507                     tail = ae_maxint(tail, residual/2, _state);
45508                 }
45509                 ae_assert(tail<residual, "SPSymmAnalyze: integrity check failed (Tail=Residual)", _state);
45510 
45511                 /*
45512                  * Apply permutation TmpPerm[] to the tail of the permutation FillInPerm[]
45513                  */
45514                 for(i=0; i<=residual-1; i++)
45515                 {
45516                     analysis->fillinperm.ptr.p_int[analysis->invfillinperm.ptr.p_int[n-residual+analysis->invtmpperm.ptr.p_int[i]]] = n-residual+i;
45517                 }
45518                 for(i=0; i<=n-1; i++)
45519                 {
45520                     analysis->invfillinperm.ptr.p_int[analysis->fillinperm.ptr.p_int[i]] = i;
45521                 }
45522 
45523                 /*
45524                  * Compute partial Cholesky of the trailing submatrix (after applying rank-K update to the
45525                  * trailing submatrix but before Cholesky-factorizing it).
45526                  */
45527                 if( tail>0 )
45528                 {
45529                     sparsesymmpermtblbuf(&analysis->tmpa, ae_false, &analysis->tmpperm, &analysis->tmpa2, _state);
45530                     spchol_partialcholeskypattern(&analysis->tmpa2, residual-tail, tail, &analysis->tmpa, &analysis->tmpparent, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->flagarray, &analysis->tmpbottomt, &analysis->tmpupdatet, &analysis->tmpupdate, &analysis->tmpnewtailt, _state);
45531                     if( analysis->extendeddebug )
45532                     {
45533                         spchol_slowdebugchecks(a, &analysis->fillinperm, n, tail, &analysis->tmpa, _state);
45534                     }
45535                 }
45536                 residual = tail;
45537                 if( analysis->dotrace )
45538                 {
45539                     ae_trace("> multiround AMD, tail=%0d\n",
45540                         (int)(residual));
45541                 }
45542             }
45543             permready = ae_true;
45544         }
45545         ae_assert(permready, "SPSymmAnalyze: integrity check failed (pp4td)", _state);
45546 
45547         /*
45548          * Apply permutation to the matrix, perform analysis on the initially reordered matrix
45549          * (we may need one more reordering, now topological one, due to supernodal analysis).
45550          * Build topologically ordered elimination tree
45551          */
45552         sparsesymmpermtblbuf(a, ae_false, &analysis->fillinperm, &analysis->tmpa, _state);
45553         spchol_buildorderedetree(&analysis->tmpa, n, &analysis->tmpparent, &analysis->superperm, &analysis->invsuperperm, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->flagarray, _state);
45554         ivectorsetlengthatleast(&analysis->effectiveperm, n, _state);
45555         ivectorsetlengthatleast(&analysis->inveffectiveperm, n, _state);
45556         for(i=0; i<=n-1; i++)
45557         {
45558             analysis->effectiveperm.ptr.p_int[i] = analysis->superperm.ptr.p_int[analysis->fillinperm.ptr.p_int[i]];
45559             analysis->inveffectiveperm.ptr.p_int[analysis->effectiveperm.ptr.p_int[i]] = i;
45560         }
45561 
45562         /*
45563          * Reorder input matrix
45564          */
45565         spchol_topologicalpermutation(&analysis->tmpa, &analysis->superperm, &analysis->tmpat, _state);
45566 
45567         /*
45568          * Analyze etree, build supernodal structure
45569          */
45570         spchol_createsupernodalstructure(&analysis->tmpat, &analysis->tmpparent, n, analysis, &analysis->node2supernode, &analysis->tmp0, &analysis->tmp1, &analysis->tmp2, &analysis->tmp3, &analysis->tmp4, &analysis->flagarray, _state);
45571 
45572         /*
45573          * Having fully initialized supernodal structure, analyze dependencies
45574          */
45575         spchol_analyzesupernodaldependencies(analysis, &analysis->tmpa, &analysis->node2supernode, n, &analysis->tmp0, &analysis->tmp1, &analysis->flagarray, _state);
45576 
45577         /*
45578          * Load matrix into the supernodal storage
45579          */
45580         spchol_loadmatrix(analysis, &analysis->tmpat, _state);
45581     }
45582     return result;
45583 }
45584 
45585 
45586 /*************************************************************************
45587 Sets modified Cholesky type
45588 
45589 INPUT PARAMETERS:
45590     Analysis    -   symbolic analysis of the matrix structure
45591     ModStrategy -   modification type:
45592                     * 0 for traditional Cholesky/LDLT (Cholesky fails when
45593                       encounters nonpositive pivot, LDLT fails  when  zero
45594                       pivot   is  encountered,  no  stability  checks  for
45595                       overflows/underflows)
45596                     * 1 for modified Cholesky with additional checks:
45597                       * pivots less than ModParam0 are increased; (similar
45598                         procedure with proper generalization is applied to
45599                         LDLT)
45600                       * if,  at  some  moment,  sum  of absolute values of
45601                         elements in column  J  will  become  greater  than
45602                         ModParam1, Cholesky/LDLT will treat it as  failure
45603                         and will stop immediately
45604                       * if ModParam0 is zero, no pivot modification is applied
45605                       * if ModParam1 is zero, no overflow check is performed
45606     P0, P1, P2,P3 - modification parameters #0 #1, #2 and #3.
45607                     Params #2 and #3 are ignored in current version.
45608 
45609 OUTPUT PARAMETERS:
45610     Analysis    -   symbolic analysis of the matrix structure, new strategy
45611                     (results will be seen with next SPSymmFactorize() call)
45612 
45613   -- ALGLIB routine --
45614      20.09.2020
45615      Bochkanov Sergey
45616 *************************************************************************/
spsymmsetmodificationstrategy(spcholanalysis * analysis,ae_int_t modstrategy,double p0,double p1,double p2,double p3,ae_state * _state)45617 void spsymmsetmodificationstrategy(spcholanalysis* analysis,
45618      ae_int_t modstrategy,
45619      double p0,
45620      double p1,
45621      double p2,
45622      double p3,
45623      ae_state *_state)
45624 {
45625 
45626 
45627     ae_assert(modstrategy==0||modstrategy==1, "SPSymmSetModificationStrategy: unexpected ModStrategy", _state);
45628     ae_assert(ae_isfinite(p0, _state)&&ae_fp_greater_eq(p0,(double)(0)), "SPSymmSetModificationStrategy: bad P0", _state);
45629     ae_assert(ae_isfinite(p1, _state), "SPSymmSetModificationStrategy: bad P1", _state);
45630     ae_assert(ae_isfinite(p2, _state), "SPSymmSetModificationStrategy: bad P2", _state);
45631     ae_assert(ae_isfinite(p3, _state), "SPSymmSetModificationStrategy: bad P3", _state);
45632     analysis->modtype = modstrategy;
45633     analysis->modparam0 = p0;
45634     analysis->modparam1 = p1;
45635     analysis->modparam2 = p2;
45636     analysis->modparam3 = p3;
45637 }
45638 
45639 
45640 /*************************************************************************
45641 Updates symmetric  matrix  internally  stored  in  previously  initialized
45642 Analysis object.
45643 
45644 You can use this function to perform  multiple  factorizations  with  same
45645 sparsity patterns: perform symbolic analysis  once  with  SPSymmAnalyze(),
45646 then update internal matrix with SPSymmReload() and call SPSymmFactorize().
45647 
45648 INPUT PARAMETERS:
45649     Analysis    -   symbolic analysis of the matrix structure
45650     A           -   sparse square matrix in CRS format with LOWER triangle
45651                     being used to store the matrix. The matrix  MUST  have
45652                     sparsity   pattern   exactly   same  as  one  used  to
45653                     initialize the Analysis object.
45654                     The algorithm will fail in  an  unpredictable  way  if
45655                     something different was passed.
45656 
45657 OUTPUT PARAMETERS:
45658     Analysis    -   symbolic analysis of the matrix structure  which  will
45659                     be used later to guide  numerical  factorization.  The
45660                     numerical values are stored internally in the structure,
45661                     but you have to  run  factorization  phase  explicitly
45662                     with SPSymmAnalyze().  You  can  also  reload  another
45663                     matrix with same sparsity pattern with SPSymmReload().
45664 
45665   -- ALGLIB routine --
45666      20.09.2020
45667      Bochkanov Sergey
45668 *************************************************************************/
spsymmreload(spcholanalysis * analysis,sparsematrix * a,ae_state * _state)45669 void spsymmreload(spcholanalysis* analysis,
45670      sparsematrix* a,
45671      ae_state *_state)
45672 {
45673 
45674 
45675     ae_assert(sparseiscrs(a, _state), "SPSymmReload: A is not stored in CRS format", _state);
45676     ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SPSymmReload: non-square A", _state);
45677     if( analysis->istopologicalordering )
45678     {
45679 
45680         /*
45681          * Topological (fill-in preserving) ordering is used, we can copy
45682          * A directly into WrkAT using joint permute+transpose
45683          */
45684         spchol_topologicalpermutation(a, &analysis->effectiveperm, &analysis->tmpat, _state);
45685         spchol_loadmatrix(analysis, &analysis->tmpat, _state);
45686     }
45687     else
45688     {
45689 
45690         /*
45691          * Non-topological permutation; first we perform generic symmetric
45692          * permutation, then transpose result
45693          */
45694         sparsesymmpermtblbuf(a, ae_false, &analysis->effectiveperm, &analysis->tmpa, _state);
45695         sparsecopytransposecrsbuf(&analysis->tmpa, &analysis->tmpat, _state);
45696         spchol_loadmatrix(analysis, &analysis->tmpat, _state);
45697     }
45698 }
45699 
45700 
45701 /*************************************************************************
45702 Updates  diagonal  of  the  symmetric  matrix  internally  stored  in  the
45703 previously initialized Analysis object.
45704 
45705 When only diagonal of the  matrix  has  changed,  this  function  is  more
45706 efficient than SPSymmReload() that has to perform  costly  permutation  of
45707 the entire matrix.
45708 
45709 You can use this function to perform  multiple  factorizations  with  same
45710 off-diagonal elements: perform symbolic analysis once with SPSymmAnalyze(),
45711 then update diagonal with SPSymmReloadDiagonal() and call SPSymmFactorize().
45712 
45713 INPUT PARAMETERS:
45714     Analysis    -   symbolic analysis of the matrix structure
45715     D           -   array[N], diagonal factor
45716 
45717 OUTPUT PARAMETERS:
45718     Analysis    -   symbolic analysis of the matrix structure  which  will
45719                     be used later to guide  numerical  factorization.  The
45720                     numerical values are stored internally in the structure,
45721                     but you have to  run  factorization  phase  explicitly
45722                     with SPSymmAnalyze().  You  can  also  reload  another
45723                     matrix with same sparsity pattern with SPSymmReload().
45724 
45725   -- ALGLIB routine --
45726      05.09.2021
45727      Bochkanov Sergey
45728 *************************************************************************/
spsymmreloaddiagonal(spcholanalysis * analysis,ae_vector * d,ae_state * _state)45729 void spsymmreloaddiagonal(spcholanalysis* analysis,
45730      /* Real    */ ae_vector* d,
45731      ae_state *_state)
45732 {
45733     ae_int_t sidx;
45734     ae_int_t cols0;
45735     ae_int_t cols1;
45736     ae_int_t offss;
45737     ae_int_t sstride;
45738     ae_int_t j;
45739 
45740 
45741     ae_assert(d->cnt>=analysis->n, "SPSymmReloadDiagonal: length(D)<N", _state);
45742     for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
45743     {
45744         cols0 = analysis->supercolrange.ptr.p_int[sidx];
45745         cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
45746         offss = analysis->rowoffsets.ptr.p_int[sidx];
45747         sstride = analysis->rowstrides.ptr.p_int[sidx];
45748         for(j=cols0; j<=cols1-1; j++)
45749         {
45750             analysis->inputstorage.ptr.p_double[offss+(j-cols0)*sstride+(j-cols0)] = d->ptr.p_double[analysis->inveffectiveperm.ptr.p_int[j]];
45751         }
45752     }
45753 }
45754 
45755 
45756 /*************************************************************************
45757 Sparse Cholesky factorization of symmetric matrix stored  in  CRS  format,
45758 using precomputed analysis of the sparsity pattern stored  in the Analysis
45759 object and specific numeric values that  are  presently  loaded  into  the
45760 Analysis.
45761 
45762 The factorization can be retrieved  with  SPSymmExtract().  Alternatively,
45763 one can perform some operations without offloading  the  matrix  (somewhat
45764 faster due to itilization of  SIMD-friendly  supernodal  data structures),
45765 most importantly - linear system solution with SPSymmSolve().
45766 
45767 Depending on settings specified during factorization, may produce  vanilla
45768 Cholesky or L*D*LT  decomposition  (with  strictly  diagonal  D),  without
45769 permutation or with permutation P (being either  topological  ordering  or
45770 sparsity preserving ordering).
45771 
45772 Thus, A is represented as either L*LT or L*D*LT or P*L*LT*PT or P*L*D*LT*PT.
45773 
45774 NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
45775       matrices. However, numerical stability is guaranteed ONLY for a class
45776       of quasi-definite matrices.
45777 
45778 INPUT PARAMETERS:
45779     Analysis    -   prior  analysis  performed on some sparse matrix, with
45780                     matrix being stored in Analysis.
45781 
45782 OUTPUT PARAMETERS:
45783     Analysis    -   contains factorization results
45784 
45785 The function returns True  when  factorization  resulted  in nondegenerate
45786 matrix. False is returned when factorization fails (Cholesky factorization
45787 of indefinite matrix) or LDLT factorization has exactly zero  elements  at
45788 the diagonal.
45789 
45790   -- ALGLIB routine --
45791      20.09.2020
45792      Bochkanov Sergey
45793 *************************************************************************/
spsymmfactorize(spcholanalysis * analysis,ae_state * _state)45794 ae_bool spsymmfactorize(spcholanalysis* analysis, ae_state *_state)
45795 {
45796     ae_int_t i;
45797     ae_int_t k;
45798     ae_int_t ii;
45799     ae_int_t n;
45800     ae_int_t cols0;
45801     ae_int_t cols1;
45802     ae_int_t offss;
45803     ae_int_t blocksize;
45804     ae_int_t sidx;
45805     ae_int_t uidx;
45806     ae_bool result;
45807 
45808 
45809     ae_assert(analysis->tasktype==0, "SPCholFactorize: Analysis type does not match current task", _state);
45810     result = ae_true;
45811     n = analysis->n;
45812 
45813     /*
45814      * Prepare structures:
45815      * * WrkRows[] store pointers to beginnings of the offdiagonal supernode row ranges;
45816      *   at the beginning of the work WrkRows[]=0, but as we advance from the column
45817      *   range [0,A) to [A,B), to [B,C) and so on, we advance WrkRows[] in order to
45818      *   quickly skip parts that are less than A, less than B, less than C and so on.
45819      */
45820     ivectorsetlengthatleast(&analysis->raw2smap, n, _state);
45821     ivectorsetlengthatleast(&analysis->tmp0, n+1, _state);
45822     bsetallocv(n, ae_false, &analysis->flagarray, _state);
45823     isetallocv(analysis->nsuper, 0, &analysis->wrkrows, _state);
45824     rsetallocv(n, 0.0, &analysis->diagd, _state);
45825     rcopyallocv(analysis->rowoffsets.ptr.p_int[analysis->nsuper], &analysis->inputstorage, &analysis->outputstorage, _state);
45826 
45827     /*
45828      * Now we can run actual supernodal Cholesky
45829      */
45830     for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
45831     {
45832         cols0 = analysis->supercolrange.ptr.p_int[sidx];
45833         cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
45834         blocksize = cols1-cols0;
45835         offss = analysis->rowoffsets.ptr.p_int[sidx];
45836 
45837         /*
45838          * Prepare mapping of raw (range 0...N-1) indexes into internal (range 0...BlockSize+OffdiagSize-1) ones
45839          */
45840         if( analysis->extendeddebug )
45841         {
45842             isetv(n, -1, &analysis->raw2smap, _state);
45843         }
45844         for(i=cols0; i<=cols1-1; i++)
45845         {
45846             analysis->raw2smap.ptr.p_int[i] = i-cols0;
45847         }
45848         for(k=analysis->superrowridx.ptr.p_int[sidx]; k<=analysis->superrowridx.ptr.p_int[sidx+1]-1; k++)
45849         {
45850             analysis->raw2smap.ptr.p_int[analysis->superrowidx.ptr.p_int[k]] = blocksize+(k-analysis->superrowridx.ptr.p_int[sidx]);
45851         }
45852 
45853         /*
45854          * Update current supernode with nonzeros from the current row
45855          */
45856         for(ii=analysis->ladjplusr.ptr.p_int[sidx]; ii<=analysis->ladjplusr.ptr.p_int[sidx+1]-1; ii++)
45857         {
45858             uidx = analysis->ladjplus.ptr.p_int[ii];
45859             analysis->wrkrows.ptr.p_int[uidx] = spchol_updatesupernode(analysis, sidx, cols0, cols1, offss, &analysis->raw2smap, uidx, analysis->wrkrows.ptr.p_int[uidx], &analysis->diagd, analysis->supercolrange.ptr.p_int[uidx], _state);
45860         }
45861 
45862         /*
45863          * Factorize current supernode
45864          */
45865         if( !spchol_factorizesupernode(analysis, sidx, _state) )
45866         {
45867             result = ae_false;
45868             return result;
45869         }
45870     }
45871     return result;
45872 }
45873 
45874 
45875 /*************************************************************************
45876 Extracts result of the last Cholesky/LDLT factorization performed  on  the
45877 Analysis object.
45878 
45879 Following calls will  result in the undefined behavior:
45880 * calling for Analysis that was not factorized with SPSymmFactorize()
45881 * calling after SPSymmFactorize() returned False
45882 
45883 INPUT PARAMETERS:
45884     Analysis    -   prior factorization performed on some sparse matrix
45885     D, P        -   possibly preallocated buffers
45886 
45887 OUTPUT PARAMETERS:
45888     A           -   Cholesky/LDLT decomposition  of A stored in CRS format
45889                     in LOWER triangle.
45890     D           -   array[N], diagonal factor. If no diagonal  factor  was
45891                     required during analysis  phase,  still  returned  but
45892                     filled with units.
45893     P           -   array[N], pivots. Permutation matrix P is a product of
45894                     P(0)*P(1)*...*P(N-1), where P(i) is a  permutation  of
45895                     row/col I and P[I] (with P[I]>=I).
45896                     If no permutation was requested during analysis phase,
45897                     still returned but filled with unit elements.
45898 
45899   -- ALGLIB routine --
45900      20.09.2020
45901      Bochkanov Sergey
45902 *************************************************************************/
spsymmextract(spcholanalysis * analysis,sparsematrix * a,ae_vector * d,ae_vector * p,ae_state * _state)45903 void spsymmextract(spcholanalysis* analysis,
45904      sparsematrix* a,
45905      /* Real    */ ae_vector* d,
45906      /* Integer */ ae_vector* p,
45907      ae_state *_state)
45908 {
45909 
45910 
45911     spchol_extractmatrix(analysis, &analysis->rowoffsets, &analysis->rowstrides, &analysis->outputstorage, &analysis->diagd, analysis->n, a, d, p, &analysis->tmp0, _state);
45912 }
45913 
45914 
45915 /*************************************************************************
45916 Solve linear system A*x=b, using internally stored  factorization  of  the
45917 matrix A.
45918 
45919 Works faster than extracting the matrix and solving with SparseTRSV()  due
45920 to SIMD-friendly supernodal data structures being used.
45921 
45922 INPUT PARAMETERS:
45923     Analysis    -   prior factorization performed on some sparse matrix
45924     B           -   array[N], right-hand side
45925 
45926 OUTPUT PARAMETERS:
45927     B           -   overwritten by X
45928 
45929   -- ALGLIB routine --
45930      08.09.2021
45931      Bochkanov Sergey
45932 *************************************************************************/
spsymmsolve(spcholanalysis * analysis,ae_vector * b,ae_state * _state)45933 void spsymmsolve(spcholanalysis* analysis,
45934      /* Real    */ ae_vector* b,
45935      ae_state *_state)
45936 {
45937     ae_int_t n;
45938     ae_int_t i;
45939     ae_int_t j;
45940     ae_int_t k;
45941     double v;
45942     ae_int_t simdwidth;
45943     ae_int_t baseoffs;
45944     ae_int_t cols0;
45945     ae_int_t cols1;
45946     ae_int_t offss;
45947     ae_int_t sstride;
45948     ae_int_t sidx;
45949     ae_int_t blocksize;
45950     ae_int_t rbase;
45951     ae_int_t offdiagsize;
45952 
45953 
45954     n = analysis->n;
45955     simdwidth = spchol_spsymmgetmaxsimd(_state);
45956     rsetallocv(n, 0.0, &analysis->tmpx, _state);
45957 
45958     /*
45959      * Handle left-hand side permutation, convert data to internal SIMD-friendly format
45960      */
45961     rsetallocv(n*simdwidth, 0.0, &analysis->simdbuf, _state);
45962     for(i=0; i<=n-1; i++)
45963     {
45964         analysis->simdbuf.ptr.p_double[i*simdwidth] = b->ptr.p_double[analysis->inveffectiveperm.ptr.p_int[i]];
45965     }
45966 
45967     /*
45968      * Solve for L*tmp_x=rhs.
45969      *
45970      * The RHS (original and temporary updates) is stored in the SIMD-friendly SIMDBuf which
45971      * stores RHS as unevaluated sum of SIMDWidth numbers (this format allows easy updates
45972      * with SIMD intrinsics), the result is written into TmpX (traditional contiguous storage).
45973      */
45974     for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
45975     {
45976         cols0 = analysis->supercolrange.ptr.p_int[sidx];
45977         cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
45978         blocksize = cols1-cols0;
45979         offss = analysis->rowoffsets.ptr.p_int[sidx];
45980         sstride = analysis->rowstrides.ptr.p_int[sidx];
45981         rbase = analysis->superrowridx.ptr.p_int[sidx];
45982         offdiagsize = analysis->superrowridx.ptr.p_int[sidx+1]-rbase;
45983 
45984         /*
45985          * Solve for variables in the supernode
45986          */
45987         for(i=cols0; i<=cols1-1; i++)
45988         {
45989             baseoffs = offss+(i-cols0)*sstride+(-cols0);
45990             v = (double)(0);
45991             for(j=0; j<=simdwidth-1; j++)
45992             {
45993                 v = v+analysis->simdbuf.ptr.p_double[i*simdwidth+j];
45994             }
45995             for(j=cols0; j<=i-1; j++)
45996             {
45997                 v = v-analysis->outputstorage.ptr.p_double[baseoffs+j]*analysis->tmpx.ptr.p_double[j];
45998             }
45999             analysis->tmpx.ptr.p_double[i] = v/analysis->outputstorage.ptr.p_double[baseoffs+i];
46000         }
46001 
46002         /*
46003          * Propagate update to other variables
46004          */
46005         spchol_propagatefwd(&analysis->tmpx, cols0, blocksize, &analysis->superrowidx, rbase, offdiagsize, &analysis->outputstorage, offss, sstride, &analysis->simdbuf, simdwidth, _state);
46006     }
46007 
46008     /*
46009      * Solve for D*tmp_x=rhs.
46010      */
46011     for(i=0; i<=n-1; i++)
46012     {
46013         if( analysis->diagd.ptr.p_double[i]!=0.0 )
46014         {
46015             analysis->tmpx.ptr.p_double[i] = analysis->tmpx.ptr.p_double[i]/analysis->diagd.ptr.p_double[i];
46016         }
46017         else
46018         {
46019             analysis->tmpx.ptr.p_double[i] = 0.0;
46020         }
46021     }
46022 
46023     /*
46024      * Solve for L'*tmp_x=rhs
46025      *
46026      */
46027     for(sidx=analysis->nsuper-1; sidx>=0; sidx--)
46028     {
46029         cols0 = analysis->supercolrange.ptr.p_int[sidx];
46030         cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
46031         blocksize = cols1-cols0;
46032         offss = analysis->rowoffsets.ptr.p_int[sidx];
46033         sstride = analysis->rowstrides.ptr.p_int[sidx];
46034         rbase = analysis->superrowridx.ptr.p_int[sidx];
46035         offdiagsize = analysis->superrowridx.ptr.p_int[sidx+1]-rbase;
46036 
46037         /*
46038          * Subtract already computed variables
46039          */
46040         for(k=0; k<=offdiagsize-1; k++)
46041         {
46042             baseoffs = offss+(k+blocksize)*sstride;
46043             v = analysis->tmpx.ptr.p_double[analysis->superrowidx.ptr.p_int[rbase+k]];
46044             for(j=0; j<=blocksize-1; j++)
46045             {
46046                 analysis->tmpx.ptr.p_double[cols0+j] = analysis->tmpx.ptr.p_double[cols0+j]-analysis->outputstorage.ptr.p_double[baseoffs+j]*v;
46047             }
46048         }
46049 
46050         /*
46051          * Solve for variables in the supernode
46052          */
46053         for(i=blocksize-1; i>=0; i--)
46054         {
46055             baseoffs = offss+i*sstride;
46056             v = analysis->tmpx.ptr.p_double[cols0+i]/analysis->outputstorage.ptr.p_double[baseoffs+i];
46057             for(j=0; j<=i-1; j++)
46058             {
46059                 analysis->tmpx.ptr.p_double[cols0+j] = analysis->tmpx.ptr.p_double[cols0+j]-v*analysis->outputstorage.ptr.p_double[baseoffs+j];
46060             }
46061             analysis->tmpx.ptr.p_double[cols0+i] = v;
46062         }
46063     }
46064 
46065     /*
46066      * Handle right-hand side permutation, convert data to internal SIMD-friendly format
46067      */
46068     for(i=0; i<=n-1; i++)
46069     {
46070         b->ptr.p_double[i] = analysis->tmpx.ptr.p_double[analysis->effectiveperm.ptr.p_int[i]];
46071     }
46072 }
46073 
46074 
46075 /*************************************************************************
46076 Compares diag(L*L') with that of the original A and returns  two  metrics:
46077 * SumSq - sum of squares of diag(A)
46078 * ErrSq - sum of squared errors, i.e. Frobenius norm of diag(L*L')-diag(A)
46079 
46080 These metrics can be used to check accuracy of the factorization.
46081 
46082 INPUT PARAMETERS:
46083     Analysis    -   prior factorization performed on some sparse matrix
46084 
46085 OUTPUT PARAMETERS:
46086     SumSq, ErrSq-   diagonal magnitude and absolute diagonal error
46087 
46088   -- ALGLIB routine --
46089      08.09.2021
46090      Bochkanov Sergey
46091 *************************************************************************/
spsymmdiagerr(spcholanalysis * analysis,double * sumsq,double * errsq,ae_state * _state)46092 void spsymmdiagerr(spcholanalysis* analysis,
46093      double* sumsq,
46094      double* errsq,
46095      ae_state *_state)
46096 {
46097     ae_int_t n;
46098     double v;
46099     double vv;
46100     ae_int_t simdwidth;
46101     ae_int_t baseoffs;
46102     ae_int_t cols0;
46103     ae_int_t cols1;
46104     ae_int_t offss;
46105     ae_int_t sstride;
46106     ae_int_t sidx;
46107     ae_int_t blocksize;
46108     ae_int_t rbase;
46109     ae_int_t offdiagsize;
46110     ae_int_t i;
46111     ae_int_t j;
46112     ae_int_t k;
46113 
46114     *sumsq = 0;
46115     *errsq = 0;
46116 
46117     n = analysis->n;
46118     simdwidth = 1;
46119 
46120     /*
46121      * Scan L, compute diag(L*L')
46122      */
46123     rsetallocv(simdwidth*n, 0.0, &analysis->simdbuf, _state);
46124     for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
46125     {
46126         cols0 = analysis->supercolrange.ptr.p_int[sidx];
46127         cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
46128         blocksize = cols1-cols0;
46129         offss = analysis->rowoffsets.ptr.p_int[sidx];
46130         sstride = analysis->rowstrides.ptr.p_int[sidx];
46131         rbase = analysis->superrowridx.ptr.p_int[sidx];
46132         offdiagsize = analysis->superrowridx.ptr.p_int[sidx+1]-rbase;
46133 
46134         /*
46135          * Handle triangular diagonal block
46136          */
46137         for(i=cols0; i<=cols1-1; i++)
46138         {
46139             baseoffs = offss+(i-cols0)*sstride+(-cols0);
46140             v = (double)(0);
46141             for(j=0; j<=simdwidth-1; j++)
46142             {
46143                 v = v+analysis->simdbuf.ptr.p_double[i*simdwidth+j];
46144             }
46145             for(j=cols0; j<=i; j++)
46146             {
46147                 vv = analysis->outputstorage.ptr.p_double[baseoffs+j];
46148                 v = v+vv*vv*analysis->diagd.ptr.p_double[j];
46149             }
46150             *sumsq = *sumsq+ae_sqr(analysis->inputstorage.ptr.p_double[baseoffs+i], _state);
46151             *errsq = *errsq+ae_sqr(analysis->inputstorage.ptr.p_double[baseoffs+i]-v, _state);
46152         }
46153 
46154         /*
46155          * Accumulate entries below triangular diagonal block
46156          */
46157         for(k=0; k<=offdiagsize-1; k++)
46158         {
46159             i = analysis->superrowidx.ptr.p_int[rbase+k];
46160             baseoffs = offss+(k+blocksize)*sstride;
46161             v = analysis->simdbuf.ptr.p_double[i*simdwidth];
46162             for(j=0; j<=blocksize-1; j++)
46163             {
46164                 vv = analysis->outputstorage.ptr.p_double[baseoffs+j];
46165                 v = v+vv*vv*analysis->diagd.ptr.p_double[cols0+j];
46166             }
46167             analysis->simdbuf.ptr.p_double[i*simdwidth] = v;
46168         }
46169     }
46170 }
46171 
46172 
46173 #ifdef ALGLIB_NO_FAST_KERNELS
46174 /*************************************************************************
46175 Informational function, useful for debugging
46176 *************************************************************************/
spchol_spsymmgetmaxsimd(ae_state * _state)46177 static ae_int_t spchol_spsymmgetmaxsimd(ae_state *_state)
46178 {
46179     ae_int_t result;
46180 
46181 
46182     result = 1;
46183     return result;
46184 }
46185 #endif
46186 
46187 
46188 #ifdef ALGLIB_NO_FAST_KERNELS
46189 /*************************************************************************
46190 Solving linear system: propagating computed supernode.
46191 
46192 Propagates computed supernode to the rest of the RHS  using  SIMD-friendly
46193 RHS storage format.
46194 
46195 INPUT PARAMETERS:
46196 
46197 OUTPUT PARAMETERS:
46198 
46199   -- ALGLIB routine --
46200      08.09.2021
46201      Bochkanov Sergey
46202 *************************************************************************/
spchol_propagatefwd(ae_vector * x,ae_int_t cols0,ae_int_t blocksize,ae_vector * superrowidx,ae_int_t rbase,ae_int_t offdiagsize,ae_vector * rowstorage,ae_int_t offss,ae_int_t sstride,ae_vector * simdbuf,ae_int_t simdwidth,ae_state * _state)46203 static void spchol_propagatefwd(/* Real    */ ae_vector* x,
46204      ae_int_t cols0,
46205      ae_int_t blocksize,
46206      /* Integer */ ae_vector* superrowidx,
46207      ae_int_t rbase,
46208      ae_int_t offdiagsize,
46209      /* Real    */ ae_vector* rowstorage,
46210      ae_int_t offss,
46211      ae_int_t sstride,
46212      /* Real    */ ae_vector* simdbuf,
46213      ae_int_t simdwidth,
46214      ae_state *_state)
46215 {
46216     ae_int_t i;
46217     ae_int_t j;
46218     ae_int_t k;
46219     ae_int_t baseoffs;
46220     double v;
46221 
46222 
46223     for(k=0; k<=offdiagsize-1; k++)
46224     {
46225         i = superrowidx->ptr.p_int[rbase+k];
46226         baseoffs = offss+(k+blocksize)*sstride;
46227         v = simdbuf->ptr.p_double[i*simdwidth];
46228         for(j=0; j<=blocksize-1; j++)
46229         {
46230             v = v-rowstorage->ptr.p_double[baseoffs+j]*x->ptr.p_double[cols0+j];
46231         }
46232         simdbuf->ptr.p_double[i*simdwidth] = v;
46233     }
46234 }
46235 #endif
46236 
46237 
46238 /*************************************************************************
46239 This function generates test reodering used for debug purposes only
46240 
46241 INPUT PARAMETERS
46242     A           -   lower triangular sparse matrix in CRS format
46243     N           -   problem size
46244 
46245 OUTPUT PARAMETERS
46246     Perm        -   array[N], maps original indexes I to permuted indexes
46247     InvPerm     -   array[N], maps permuted indexes I to original indexes
46248 
46249   -- ALGLIB PROJECT --
46250      Copyright 05.10.2020 by Bochkanov Sergey.
46251 *************************************************************************/
spchol_generatedbgpermutation(sparsematrix * a,ae_int_t n,ae_vector * perm,ae_vector * invperm,ae_state * _state)46252 static void spchol_generatedbgpermutation(sparsematrix* a,
46253      ae_int_t n,
46254      /* Integer */ ae_vector* perm,
46255      /* Integer */ ae_vector* invperm,
46256      ae_state *_state)
46257 {
46258     ae_frame _frame_block;
46259     ae_int_t i;
46260     ae_int_t j;
46261     ae_int_t j0;
46262     ae_int_t j1;
46263     ae_int_t jj;
46264     ae_vector d;
46265     ae_vector tmpr;
46266     ae_vector tmpperm;
46267 
46268     ae_frame_make(_state, &_frame_block);
46269     memset(&d, 0, sizeof(d));
46270     memset(&tmpr, 0, sizeof(tmpr));
46271     memset(&tmpperm, 0, sizeof(tmpperm));
46272     ae_vector_init(&d, 0, DT_REAL, _state, ae_true);
46273     ae_vector_init(&tmpr, 0, DT_REAL, _state, ae_true);
46274     ae_vector_init(&tmpperm, 0, DT_INT, _state, ae_true);
46275 
46276 
46277     /*
46278      * Initialize D by vertex degrees
46279      */
46280     rsetallocv(n, (double)(0), &d, _state);
46281     for(i=0; i<=n-1; i++)
46282     {
46283         j0 = a->ridx.ptr.p_int[i];
46284         j1 = a->didx.ptr.p_int[i]-1;
46285         d.ptr.p_double[i] = (double)(j1-j0+1);
46286         for(jj=j0; jj<=j1; jj++)
46287         {
46288             j = a->idx.ptr.p_int[jj];
46289             d.ptr.p_double[j] = d.ptr.p_double[j]+1;
46290         }
46291     }
46292 
46293     /*
46294      * Prepare permutation that orders vertices by degrees
46295      */
46296     iallocv(n, invperm, _state);
46297     for(i=0; i<=n-1; i++)
46298     {
46299         invperm->ptr.p_int[i] = i;
46300     }
46301     tagsortfasti(&d, invperm, &tmpr, &tmpperm, n, _state);
46302     iallocv(n, perm, _state);
46303     for(i=0; i<=n-1; i++)
46304     {
46305         perm->ptr.p_int[invperm->ptr.p_int[i]] = i;
46306     }
46307     ae_frame_leave(_state);
46308 }
46309 
46310 
46311 /*************************************************************************
46312 This function builds elimination tree in the original column order
46313 
46314 INPUT PARAMETERS
46315     A           -   lower triangular sparse matrix in CRS format
46316     N           -   problem size
46317     Parent,
46318     tAbove      -   preallocated temporary array, length at least N+1, no
46319                     meaningful output is provided in these variables
46320 
46321 OUTPUT PARAMETERS
46322     Parent      -   array[N], Parent[I] contains index of parent of I-th
46323                     column. -1 is used to denote column with no parents.
46324 
46325   -- ALGLIB PROJECT --
46326      Copyright 15.08.2021 by Bochkanov Sergey.
46327 *************************************************************************/
spchol_buildunorderedetree(sparsematrix * a,ae_int_t n,ae_vector * parent,ae_vector * tabove,ae_state * _state)46328 static void spchol_buildunorderedetree(sparsematrix* a,
46329      ae_int_t n,
46330      /* Integer */ ae_vector* parent,
46331      /* Integer */ ae_vector* tabove,
46332      ae_state *_state)
46333 {
46334     ae_int_t r;
46335     ae_int_t abover;
46336     ae_int_t i;
46337     ae_int_t j;
46338     ae_int_t k;
46339     ae_int_t j0;
46340     ae_int_t j1;
46341     ae_int_t jj;
46342 
46343 
46344     ae_assert(parent->cnt>=n+1, "BuildUnorderedETree: input buffer Parent is too short", _state);
46345     ae_assert(tabove->cnt>=n+1, "BuildUnorderedETree: input buffer tAbove is too short", _state);
46346 
46347     /*
46348      * Build elimination tree using Liu's algorithm with path compression
46349      */
46350     for(j=0; j<=n-1; j++)
46351     {
46352         parent->ptr.p_int[j] = n;
46353         tabove->ptr.p_int[j] = n;
46354         j0 = a->ridx.ptr.p_int[j];
46355         j1 = a->didx.ptr.p_int[j]-1;
46356         for(jj=j0; jj<=j1; jj++)
46357         {
46358             r = a->idx.ptr.p_int[jj];
46359             abover = tabove->ptr.p_int[r];
46360             while(abover<j)
46361             {
46362                 k = abover;
46363                 tabove->ptr.p_int[r] = j;
46364                 r = k;
46365                 abover = tabove->ptr.p_int[r];
46366             }
46367             if( abover==n )
46368             {
46369                 tabove->ptr.p_int[r] = j;
46370                 parent->ptr.p_int[r] = j;
46371             }
46372         }
46373     }
46374 
46375     /*
46376      * Convert to external format
46377      */
46378     for(i=0; i<=n-1; i++)
46379     {
46380         if( parent->ptr.p_int[i]==n )
46381         {
46382             parent->ptr.p_int[i] = -1;
46383         }
46384     }
46385 }
46386 
46387 
46388 /*************************************************************************
46389 This function analyzes  elimination  tree  stored  using  'parent-of-node'
46390 format and converts it to the 'childrens-of-node' format.
46391 
46392 INPUT PARAMETERS
46393     Parent      -   array[N], supernodal etree
46394     N           -   problem size
46395     ChildrenR,
46396     ChildrenI,
46397     tTmp0       -   preallocated arrays, length at least N+1
46398 
46399 OUTPUT PARAMETERS
46400     ChildrenR   -   array[N+1], children range (see below)
46401     ChildrenI   -   array[N+1], childrens of K-th node are stored  in  the
46402                     elements ChildrenI[ChildrenR[K]...ChildrenR[K+1]-1]
46403 
46404   -- ALGLIB PROJECT --
46405      Copyright 05.10.2020 by Bochkanov Sergey.
46406 *************************************************************************/
spchol_fromparenttochildren(ae_vector * parent,ae_int_t n,ae_vector * childrenr,ae_vector * childreni,ae_vector * ttmp0,ae_state * _state)46407 static void spchol_fromparenttochildren(/* Integer */ ae_vector* parent,
46408      ae_int_t n,
46409      /* Integer */ ae_vector* childrenr,
46410      /* Integer */ ae_vector* childreni,
46411      /* Integer */ ae_vector* ttmp0,
46412      ae_state *_state)
46413 {
46414     ae_int_t i;
46415     ae_int_t k;
46416     ae_int_t nodeidx;
46417 
46418 
46419     ae_assert(ttmp0->cnt>=n+1, "FromParentToChildren: input buffer tTmp0 is too short", _state);
46420     ae_assert(childrenr->cnt>=n+1, "FromParentToChildren: input buffer ChildrenR is too short", _state);
46421     ae_assert(childreni->cnt>=n+1, "FromParentToChildren: input buffer ChildrenI is too short", _state);
46422 
46423     /*
46424      * Convert etree from per-column parent array to per-column children list
46425      */
46426     isetv(n, 0, ttmp0, _state);
46427     for(i=0; i<=n-1; i++)
46428     {
46429         nodeidx = parent->ptr.p_int[i];
46430         if( nodeidx>=0 )
46431         {
46432             ttmp0->ptr.p_int[nodeidx] = ttmp0->ptr.p_int[nodeidx]+1;
46433         }
46434     }
46435     childrenr->ptr.p_int[0] = 0;
46436     for(i=0; i<=n-1; i++)
46437     {
46438         childrenr->ptr.p_int[i+1] = childrenr->ptr.p_int[i]+ttmp0->ptr.p_int[i];
46439     }
46440     isetv(n, 0, ttmp0, _state);
46441     for(i=0; i<=n-1; i++)
46442     {
46443         k = parent->ptr.p_int[i];
46444         if( k>=0 )
46445         {
46446             childreni->ptr.p_int[childrenr->ptr.p_int[k]+ttmp0->ptr.p_int[k]] = i;
46447             ttmp0->ptr.p_int[k] = ttmp0->ptr.p_int[k]+1;
46448         }
46449     }
46450 }
46451 
46452 
46453 /*************************************************************************
46454 This function builds elimination tree and reorders  it  according  to  the
46455 topological post-ordering.
46456 
46457 INPUT PARAMETERS
46458     A           -   lower triangular sparse matrix in CRS format
46459     N           -   problem size
46460 
46461     tRawParentOfRawNode,
46462     tRawParentOfReorderedNode,
46463     tTmp,
46464     tFlagArray  -   preallocated temporary arrays, length at least N+1, no
46465                     meaningful output is provided in these variables
46466 
46467 OUTPUT PARAMETERS
46468     Parent      -   array[N], Parent[I] contains index of parent of I-th
46469                     column (after topological reordering). -1 is used to
46470                     denote column with no parents.
46471     SupernodalPermutation
46472                 -   array[N], maps original indexes I to permuted indexes
46473     InvSupernodalPermutation
46474                 -   array[N], maps permuted indexes I to original indexes
46475 
46476   -- ALGLIB PROJECT --
46477      Copyright 05.10.2020 by Bochkanov Sergey.
46478 *************************************************************************/
spchol_buildorderedetree(sparsematrix * a,ae_int_t n,ae_vector * parent,ae_vector * supernodalpermutation,ae_vector * invsupernodalpermutation,ae_vector * trawparentofrawnode,ae_vector * trawparentofreorderednode,ae_vector * ttmp,ae_vector * tflagarray,ae_state * _state)46479 static void spchol_buildorderedetree(sparsematrix* a,
46480      ae_int_t n,
46481      /* Integer */ ae_vector* parent,
46482      /* Integer */ ae_vector* supernodalpermutation,
46483      /* Integer */ ae_vector* invsupernodalpermutation,
46484      /* Integer */ ae_vector* trawparentofrawnode,
46485      /* Integer */ ae_vector* trawparentofreorderednode,
46486      /* Integer */ ae_vector* ttmp,
46487      /* Boolean */ ae_vector* tflagarray,
46488      ae_state *_state)
46489 {
46490     ae_int_t i;
46491     ae_int_t k;
46492     ae_int_t sidx;
46493     ae_int_t unprocessedchildrencnt;
46494 
46495 
46496     ae_assert(trawparentofrawnode->cnt>=n+1, "BuildOrderedETree: input buffer tRawParentOfRawNode is too short", _state);
46497     ae_assert(ttmp->cnt>=n+1, "BuildOrderedETree: input buffer tTmp is too short", _state);
46498     ae_assert(trawparentofreorderednode->cnt>=n+1, "BuildOrderedETree: input buffer tRawParentOfReorderedNode is too short", _state);
46499     ae_assert(tflagarray->cnt>=n+1, "BuildOrderedETree: input buffer tFlagArray is too short", _state);
46500 
46501     /*
46502      * Avoid spurious compiler warnings
46503      */
46504     unprocessedchildrencnt = 0;
46505 
46506     /*
46507      * Build elimination tree with original column order
46508      */
46509     spchol_buildunorderedetree(a, n, trawparentofrawnode, ttmp, _state);
46510 
46511     /*
46512      * Compute topological ordering of the elimination tree, produce:
46513      * * direct and inverse permutations
46514      * * reordered etree stored in Parent[]
46515      */
46516     isetallocv(n, -1, invsupernodalpermutation, _state);
46517     isetallocv(n, -1, supernodalpermutation, _state);
46518     isetallocv(n, -1, parent, _state);
46519     isetv(n, -1, trawparentofreorderednode, _state);
46520     isetv(n, 0, ttmp, _state);
46521     for(i=0; i<=n-1; i++)
46522     {
46523         k = trawparentofrawnode->ptr.p_int[i];
46524         if( k>=0 )
46525         {
46526             ttmp->ptr.p_int[k] = ttmp->ptr.p_int[k]+1;
46527         }
46528     }
46529     bsetv(n, ae_true, tflagarray, _state);
46530     sidx = 0;
46531     for(i=0; i<=n-1; i++)
46532     {
46533         if( tflagarray->ptr.p_bool[i] )
46534         {
46535 
46536             /*
46537              * Move column I to position SIdx, decrease unprocessed children count
46538              */
46539             supernodalpermutation->ptr.p_int[i] = sidx;
46540             invsupernodalpermutation->ptr.p_int[sidx] = i;
46541             tflagarray->ptr.p_bool[i] = ae_false;
46542             k = trawparentofrawnode->ptr.p_int[i];
46543             trawparentofreorderednode->ptr.p_int[sidx] = k;
46544             if( k>=0 )
46545             {
46546                 unprocessedchildrencnt = ttmp->ptr.p_int[k]-1;
46547                 ttmp->ptr.p_int[k] = unprocessedchildrencnt;
46548             }
46549             sidx = sidx+1;
46550 
46551             /*
46552              * Add parents (as long as parent has no unprocessed children)
46553              */
46554             while(k>=0&&unprocessedchildrencnt==0)
46555             {
46556                 supernodalpermutation->ptr.p_int[k] = sidx;
46557                 invsupernodalpermutation->ptr.p_int[sidx] = k;
46558                 tflagarray->ptr.p_bool[k] = ae_false;
46559                 k = trawparentofrawnode->ptr.p_int[k];
46560                 trawparentofreorderednode->ptr.p_int[sidx] = k;
46561                 if( k>=0 )
46562                 {
46563                     unprocessedchildrencnt = ttmp->ptr.p_int[k]-1;
46564                     ttmp->ptr.p_int[k] = unprocessedchildrencnt;
46565                 }
46566                 sidx = sidx+1;
46567             }
46568         }
46569     }
46570     for(i=0; i<=n-1; i++)
46571     {
46572         k = trawparentofreorderednode->ptr.p_int[i];
46573         if( k>=0 )
46574         {
46575             parent->ptr.p_int[i] = supernodalpermutation->ptr.p_int[k];
46576         }
46577     }
46578 }
46579 
46580 
46581 /*************************************************************************
46582 This function analyzes postordered elimination tree and creates supernodal
46583 structure in Analysis object.
46584 
46585 INPUT PARAMETERS
46586     AT          -   upper triangular CRS matrix, transpose and  reordering
46587                     of the original input matrix A
46588     Parent      -   array[N], supernodal etree
46589     N           -   problem size
46590 
46591     tChildrenR,
46592     tChildrenI,
46593     tParentNodeOfSupernode,
46594     tNode2Supernode,
46595     tTmp0,
46596     tFlagArray  -   temporary arrays, length at least N+1, simply provide
46597                     preallocated place.
46598 
46599 OUTPUT PARAMETERS
46600     Analysis    -   following fields are initialized:
46601                     * Analysis.NSuper
46602                     * Analysis.SuperColRange
46603                     * Analysis.SuperRowRIdx
46604                     * Analysis.SuperRowIdx
46605                     * Analysis.ParentSupernode
46606                     * Analysis.OutRowCounts
46607                     other fields are ignored and not changed.
46608     Node2Supernode- array[N] that maps node indexes to supernode indexes
46609 
46610   -- ALGLIB PROJECT --
46611      Copyright 05.10.2020 by Bochkanov Sergey.
46612 *************************************************************************/
spchol_createsupernodalstructure(sparsematrix * at,ae_vector * parent,ae_int_t n,spcholanalysis * analysis,ae_vector * node2supernode,ae_vector * tchildrenr,ae_vector * tchildreni,ae_vector * tparentnodeofsupernode,ae_vector * tfakenonzeros,ae_vector * ttmp0,ae_vector * tflagarray,ae_state * _state)46613 static void spchol_createsupernodalstructure(sparsematrix* at,
46614      /* Integer */ ae_vector* parent,
46615      ae_int_t n,
46616      spcholanalysis* analysis,
46617      /* Integer */ ae_vector* node2supernode,
46618      /* Integer */ ae_vector* tchildrenr,
46619      /* Integer */ ae_vector* tchildreni,
46620      /* Integer */ ae_vector* tparentnodeofsupernode,
46621      /* Integer */ ae_vector* tfakenonzeros,
46622      /* Integer */ ae_vector* ttmp0,
46623      /* Boolean */ ae_vector* tflagarray,
46624      ae_state *_state)
46625 {
46626     ae_int_t nsuper;
46627     ae_int_t i;
46628     ae_int_t j;
46629     ae_int_t k;
46630     ae_int_t sidx;
46631     ae_int_t i0;
46632     ae_int_t ii;
46633     ae_int_t columnidx;
46634     ae_int_t nodeidx;
46635     ae_int_t rfirst;
46636     ae_int_t rlast;
46637     ae_int_t cols0;
46638     ae_int_t cols1;
46639     ae_int_t blocksize;
46640     ae_bool createsupernode;
46641     ae_int_t colcount;
46642     ae_int_t offdiagcnt;
46643     ae_int_t childcolcount;
46644     ae_int_t childoffdiagcnt;
46645     ae_int_t fakezerosinnewsupernode;
46646     double mergeinefficiency;
46647     ae_bool hastheonlychild;
46648 
46649 
46650     ae_assert(ttmp0->cnt>=n+1, "CreateSupernodalStructure: input buffer tTmp0 is too short", _state);
46651     ae_assert(tchildrenr->cnt>=n+1, "CreateSupernodalStructure: input buffer ChildrenR is too short", _state);
46652     ae_assert(tchildreni->cnt>=n+1, "CreateSupernodalStructure: input buffer ChildrenI is too short", _state);
46653     ae_assert(tparentnodeofsupernode->cnt>=n+1, "CreateSupernodalStructure: input buffer tParentNodeOfSupernode is too short", _state);
46654     ae_assert(tfakenonzeros->cnt>=n+1, "CreateSupernodalStructure: input buffer tFakeNonzeros is too short", _state);
46655     ae_assert(tflagarray->cnt>=n+1, "CreateSupernodalStructure: input buffer tFlagArray is too short", _state);
46656 
46657     /*
46658      * Trace
46659      */
46660     if( analysis->dotracesupernodalstructure )
46661     {
46662         ae_trace("=== GENERATING SUPERNODAL STRUCTURE ================================================================\n");
46663     }
46664 
46665     /*
46666      * Convert etree from per-column parent array to per-column children list
46667      */
46668     spchol_fromparenttochildren(parent, n, tchildrenr, tchildreni, ttmp0, _state);
46669 
46670     /*
46671      * Analyze supernodal structure:
46672      * * determine children count for each node
46673      * * combine chains of children into supernodes
46674      * * generate direct and inverse supernodal (topological) permutations
46675      * * generate column structure of supernodes (after supernodal permutation)
46676      */
46677     isetallocv(n, -1, node2supernode, _state);
46678     ivectorsetlengthatleast(&analysis->supercolrange, n+1, _state);
46679     ivectorsetlengthatleast(&analysis->superrowridx, n+1, _state);
46680     isetv(n, n+1, tparentnodeofsupernode, _state);
46681     bsetv(n, ae_true, tflagarray, _state);
46682     nsuper = 0;
46683     analysis->supercolrange.ptr.p_int[0] = 0;
46684     analysis->superrowridx.ptr.p_int[0] = 0;
46685     while(analysis->supercolrange.ptr.p_int[nsuper]<n)
46686     {
46687         columnidx = analysis->supercolrange.ptr.p_int[nsuper];
46688 
46689         /*
46690          * Compute nonzero pattern of the column, create temporary standalone node
46691          * for possible supernodal merge. Newly created node has just one column
46692          * and no fake nonzeros.
46693          */
46694         rfirst = analysis->superrowridx.ptr.p_int[nsuper];
46695         rlast = spchol_computenonzeropattern(at, columnidx, n, &analysis->superrowridx, &analysis->superrowidx, nsuper, tchildrenr, tchildreni, node2supernode, tflagarray, ttmp0, _state);
46696         analysis->supercolrange.ptr.p_int[nsuper+1] = columnidx+1;
46697         analysis->superrowridx.ptr.p_int[nsuper+1] = rlast;
46698         node2supernode->ptr.p_int[columnidx] = nsuper;
46699         tparentnodeofsupernode->ptr.p_int[nsuper] = parent->ptr.p_int[columnidx];
46700         tfakenonzeros->ptr.p_int[nsuper] = 0;
46701         offdiagcnt = rlast-rfirst;
46702         colcount = 1;
46703         nsuper = nsuper+1;
46704         if( analysis->dotracesupernodalstructure )
46705         {
46706             ae_trace("> incoming column %0d\n",
46707                 (int)(columnidx));
46708             ae_trace("offdiagnnz = %0d\n",
46709                 (int)(rlast-rfirst));
46710             ae_trace("children   = [ ");
46711             for(i=tchildrenr->ptr.p_int[columnidx]; i<=tchildrenr->ptr.p_int[columnidx+1]-1; i++)
46712             {
46713                 ae_trace("S%0d ",
46714                     (int)(node2supernode->ptr.p_int[tchildreni->ptr.p_int[i]]));
46715             }
46716             ae_trace("]\n");
46717         }
46718 
46719         /*
46720          * Decide whether to merge column with previous supernode or not
46721          */
46722         childcolcount = 0;
46723         childoffdiagcnt = 0;
46724         mergeinefficiency = 0.0;
46725         fakezerosinnewsupernode = 0;
46726         createsupernode = ae_false;
46727         hastheonlychild = ae_false;
46728         if( nsuper>=2&&tparentnodeofsupernode->ptr.p_int[nsuper-2]==columnidx )
46729         {
46730             childcolcount = analysis->supercolrange.ptr.p_int[nsuper-1]-analysis->supercolrange.ptr.p_int[nsuper-2];
46731             childoffdiagcnt = analysis->superrowridx.ptr.p_int[nsuper-1]-analysis->superrowridx.ptr.p_int[nsuper-2];
46732             hastheonlychild = tchildrenr->ptr.p_int[columnidx+1]-tchildrenr->ptr.p_int[columnidx]==1;
46733             if( (hastheonlychild||spchol_relaxedsupernodes)&&colcount+childcolcount<=spchol_maxsupernode )
46734             {
46735                 i = colcount+childcolcount;
46736                 k = i*(i+1)/2+offdiagcnt*i;
46737                 fakezerosinnewsupernode = tfakenonzeros->ptr.p_int[nsuper-2]+tfakenonzeros->ptr.p_int[nsuper-1]+(offdiagcnt-(childoffdiagcnt-1))*childcolcount;
46738                 mergeinefficiency = (double)fakezerosinnewsupernode/(double)k;
46739                 if( colcount+childcolcount==2&&fakezerosinnewsupernode<=spchol_smallfakestolerance )
46740                 {
46741                     createsupernode = ae_true;
46742                 }
46743                 if( ae_fp_less_eq(mergeinefficiency,spchol_maxmergeinefficiency) )
46744                 {
46745                     createsupernode = ae_true;
46746                 }
46747             }
46748         }
46749 
46750         /*
46751          * Create supernode if needed
46752          */
46753         if( createsupernode )
46754         {
46755 
46756             /*
46757              * Create supernode from nodes NSuper-2 and NSuper-1.
46758              * Because these nodes are in the child-parent relation, we can simply
46759              * copy nonzero pattern from NSuper-1.
46760              */
46761             ae_assert(tparentnodeofsupernode->ptr.p_int[nsuper-2]==columnidx, "CreateSupernodalStructure: integrity check 9472 failed", _state);
46762             i0 = analysis->superrowridx.ptr.p_int[nsuper-1];
46763             ii = analysis->superrowridx.ptr.p_int[nsuper]-analysis->superrowridx.ptr.p_int[nsuper-1];
46764             rfirst = analysis->superrowridx.ptr.p_int[nsuper-2];
46765             rlast = rfirst+ii;
46766             for(i=0; i<=ii-1; i++)
46767             {
46768                 analysis->superrowidx.ptr.p_int[rfirst+i] = analysis->superrowidx.ptr.p_int[i0+i];
46769             }
46770             analysis->supercolrange.ptr.p_int[nsuper-1] = columnidx+1;
46771             analysis->superrowridx.ptr.p_int[nsuper-1] = rlast;
46772             node2supernode->ptr.p_int[columnidx] = nsuper-2;
46773             tfakenonzeros->ptr.p_int[nsuper-2] = fakezerosinnewsupernode;
46774             tparentnodeofsupernode->ptr.p_int[nsuper-2] = parent->ptr.p_int[columnidx];
46775             nsuper = nsuper-1;
46776 
46777             /*
46778              * Trace
46779              */
46780             if( analysis->dotracesupernodalstructure )
46781             {
46782                 ae_trace("> merged with supernode S%0d",
46783                     (int)(nsuper-1));
46784                 if( ae_fp_neq(mergeinefficiency,(double)(0)) )
46785                 {
46786                     ae_trace(" (%2.0f%% inefficiency)",
46787                         (double)(mergeinefficiency*100));
46788                 }
46789                 ae_trace("\n*\n");
46790             }
46791         }
46792         else
46793         {
46794 
46795             /*
46796              * Trace
46797              */
46798             if( analysis->dotracesupernodalstructure )
46799             {
46800                 ae_trace("> standalone node S%0d created\n*\n",
46801                     (int)(nsuper-1));
46802             }
46803         }
46804     }
46805     analysis->nsuper = nsuper;
46806     ae_assert(analysis->nsuper>=1, "SPSymmAnalyze: integrity check failed (95mgd)", _state);
46807     ae_assert(analysis->supercolrange.ptr.p_int[0]==0, "SPCholFactorize: integrity check failed (f446s)", _state);
46808     ae_assert(analysis->supercolrange.ptr.p_int[nsuper]==n, "SPSymmAnalyze: integrity check failed (04ut4)", _state);
46809     isetallocv(nsuper, -1, &analysis->parentsupernode, _state);
46810     for(sidx=0; sidx<=nsuper-1; sidx++)
46811     {
46812         nodeidx = tparentnodeofsupernode->ptr.p_int[sidx];
46813         if( nodeidx>=0 )
46814         {
46815             nodeidx = node2supernode->ptr.p_int[nodeidx];
46816             analysis->parentsupernode.ptr.p_int[sidx] = nodeidx;
46817         }
46818     }
46819 
46820     /*
46821      * Allocate supernodal storage
46822      */
46823     ivectorsetlengthatleast(&analysis->rowoffsets, analysis->nsuper+1, _state);
46824     ivectorsetlengthatleast(&analysis->rowstrides, analysis->nsuper, _state);
46825     analysis->rowoffsets.ptr.p_int[0] = 0;
46826     for(i=0; i<=analysis->nsuper-1; i++)
46827     {
46828         blocksize = analysis->supercolrange.ptr.p_int[i+1]-analysis->supercolrange.ptr.p_int[i];
46829         analysis->rowstrides.ptr.p_int[i] = spchol_recommendedstridefor(blocksize, _state);
46830         analysis->rowoffsets.ptr.p_int[i+1] = analysis->rowoffsets.ptr.p_int[i];
46831         analysis->rowoffsets.ptr.p_int[i+1] = analysis->rowoffsets.ptr.p_int[i+1]+analysis->rowstrides.ptr.p_int[i]*blocksize;
46832         analysis->rowoffsets.ptr.p_int[i+1] = analysis->rowoffsets.ptr.p_int[i+1]+analysis->rowstrides.ptr.p_int[i]*(analysis->superrowridx.ptr.p_int[i+1]-analysis->superrowridx.ptr.p_int[i]);
46833         analysis->rowoffsets.ptr.p_int[i+1] = spchol_alignpositioninarray(analysis->rowoffsets.ptr.p_int[i+1], _state);
46834     }
46835 
46836     /*
46837      * Analyze output structure
46838      */
46839     isetallocv(n, 0, &analysis->outrowcounts, _state);
46840     for(sidx=0; sidx<=nsuper-1; sidx++)
46841     {
46842         cols0 = analysis->supercolrange.ptr.p_int[sidx];
46843         cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
46844         rfirst = analysis->superrowridx.ptr.p_int[sidx];
46845         rlast = analysis->superrowridx.ptr.p_int[sidx+1];
46846         blocksize = cols1-cols0;
46847         for(j=cols0; j<=cols1-1; j++)
46848         {
46849             analysis->outrowcounts.ptr.p_int[j] = analysis->outrowcounts.ptr.p_int[j]+(j-cols0+1);
46850         }
46851         for(ii=rfirst; ii<=rlast-1; ii++)
46852         {
46853             i0 = analysis->superrowidx.ptr.p_int[ii];
46854             analysis->outrowcounts.ptr.p_int[i0] = analysis->outrowcounts.ptr.p_int[i0]+blocksize;
46855         }
46856     }
46857 }
46858 
46859 
46860 /*************************************************************************
46861 This function analyzes supernodal  structure  and  precomputes  dependency
46862 matrix LAdj+
46863 
46864 INPUT PARAMETERS
46865     Analysis    -   analysis object with completely initialized supernodal
46866                     structure
46867     RawA        -   original (before reordering) input matrix
46868     Node2Supernode- mapping from node to supernode indexes
46869     N           -   problem size
46870 
46871     tTmp0,
46872     tTmp1,
46873     tFlagArray  -   temporary arrays, length at least N+1, simply provide
46874                     preallocated place.
46875 
46876 OUTPUT PARAMETERS
46877     Analysis    -   following fields are initialized:
46878                     * Analysis.LAdjPlus
46879                     * Analysis.LAdjPlusR
46880     Node2Supernode- array[N] that maps node indexes to supernode indexes
46881 
46882   -- ALGLIB PROJECT --
46883      Copyright 05.10.2020 by Bochkanov Sergey.
46884 *************************************************************************/
spchol_analyzesupernodaldependencies(spcholanalysis * analysis,sparsematrix * rawa,ae_vector * node2supernode,ae_int_t n,ae_vector * ttmp0,ae_vector * ttmp1,ae_vector * tflagarray,ae_state * _state)46885 static void spchol_analyzesupernodaldependencies(spcholanalysis* analysis,
46886      sparsematrix* rawa,
46887      /* Integer */ ae_vector* node2supernode,
46888      ae_int_t n,
46889      /* Integer */ ae_vector* ttmp0,
46890      /* Integer */ ae_vector* ttmp1,
46891      /* Boolean */ ae_vector* tflagarray,
46892      ae_state *_state)
46893 {
46894     ae_int_t i;
46895     ae_int_t j;
46896     ae_int_t rowidx;
46897     ae_int_t j0;
46898     ae_int_t j1;
46899     ae_int_t jj;
46900     ae_int_t rfirst;
46901     ae_int_t rlast;
46902     ae_int_t sidx;
46903     ae_int_t uidx;
46904     ae_int_t dbgrank1nodes;
46905     ae_int_t dbgrank2nodes;
46906     ae_int_t dbgrank3nodes;
46907     ae_int_t dbgrank4nodes;
46908     ae_int_t dbgbignodes;
46909     double dbgtotalflop;
46910     double dbgnoscatterflop;
46911     double dbgnorowscatterflop;
46912     double dbgnocolscatterflop;
46913     double dbgcholeskyflop;
46914     double dbgcholesky4flop;
46915     double dbgrank1flop;
46916     double dbgrank4plusflop;
46917     double dbg444flop;
46918     double dbgxx4flop;
46919     double uflop;
46920     ae_int_t wrkrow;
46921     ae_int_t offdiagrow;
46922     ae_int_t lastrow;
46923     ae_int_t uwidth;
46924     ae_int_t uheight;
46925     ae_int_t urank;
46926     ae_int_t theight;
46927     ae_int_t twidth;
46928 
46929 
46930     ae_assert(ttmp0->cnt>=n+1, "AnalyzeSupernodalDependencies: input buffer tTmp0 is too short", _state);
46931     ae_assert(ttmp1->cnt>=n+1, "AnalyzeSupernodalDependencies: input buffer tTmp1 is too short", _state);
46932     ae_assert(tflagarray->cnt>=n+1, "AnalyzeSupernodalDependencies: input buffer tTmp0 is too short", _state);
46933     ae_assert(sparseiscrs(rawa, _state), "AnalyzeSupernodalDependencies: RawA must be CRS matrix", _state);
46934 
46935     /*
46936      * Determine LAdjPlus - supernodes feeding updates to the SIdx-th one.
46937      *
46938      * Without supernodes we have: K-th row of L (also denoted as ladj+(K))
46939      * includes original nonzeros from A (also denoted as ladj(K)) as well
46940      * as all elements on paths in elimination tree from ladj(K) to K.
46941      *
46942      * With supernodes: same principle applied.
46943      */
46944     isetallocv(analysis->nsuper+1, 0, &analysis->ladjplusr, _state);
46945     bsetv(n, ae_true, tflagarray, _state);
46946     analysis->ladjplusr.ptr.p_int[0] = 0;
46947     for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
46948     {
46949 
46950         /*
46951          * Generate list of nodes feeding updates to SIdx-th one
46952          */
46953         ivectorgrowto(&analysis->ladjplus, analysis->ladjplusr.ptr.p_int[sidx]+analysis->nsuper, _state);
46954         rfirst = analysis->ladjplusr.ptr.p_int[sidx];
46955         rlast = rfirst;
46956         for(rowidx=analysis->supercolrange.ptr.p_int[sidx]; rowidx<=analysis->supercolrange.ptr.p_int[sidx+1]-1; rowidx++)
46957         {
46958             i = analysis->invsuperperm.ptr.p_int[rowidx];
46959             j0 = rawa->ridx.ptr.p_int[i];
46960             j1 = rawa->uidx.ptr.p_int[i]-1;
46961             for(jj=j0; jj<=j1; jj++)
46962             {
46963                 j = node2supernode->ptr.p_int[analysis->superperm.ptr.p_int[rawa->idx.ptr.p_int[jj]]];
46964                 if( j<sidx&&tflagarray->ptr.p_bool[j] )
46965                 {
46966                     analysis->ladjplus.ptr.p_int[rlast] = j;
46967                     tflagarray->ptr.p_bool[j] = ae_false;
46968                     rlast = rlast+1;
46969                     j = analysis->parentsupernode.ptr.p_int[j];
46970                     while((j>=0&&j<sidx)&&tflagarray->ptr.p_bool[j])
46971                     {
46972                         analysis->ladjplus.ptr.p_int[rlast] = j;
46973                         tflagarray->ptr.p_bool[j] = ae_false;
46974                         rlast = rlast+1;
46975                         j = analysis->parentsupernode.ptr.p_int[j];
46976                     }
46977                 }
46978             }
46979         }
46980         for(i=rfirst; i<=rlast-1; i++)
46981         {
46982             tflagarray->ptr.p_bool[analysis->ladjplus.ptr.p_int[i]] = ae_true;
46983         }
46984         analysis->ladjplusr.ptr.p_int[sidx+1] = rlast;
46985     }
46986 
46987     /*
46988      * Analyze statistics for trace output
46989      */
46990     if( analysis->dotrace )
46991     {
46992         ae_trace("=== ANALYZING SUPERNODAL DEPENDENCIES ==============================================================\n");
46993         dbgrank1nodes = 0;
46994         dbgrank2nodes = 0;
46995         dbgrank3nodes = 0;
46996         dbgrank4nodes = 0;
46997         dbgbignodes = 0;
46998         dbgtotalflop = (double)(0);
46999         dbgnoscatterflop = (double)(0);
47000         dbgnorowscatterflop = (double)(0);
47001         dbgnocolscatterflop = (double)(0);
47002         dbgrank1flop = (double)(0);
47003         dbgrank4plusflop = (double)(0);
47004         dbg444flop = (double)(0);
47005         dbgxx4flop = (double)(0);
47006         dbgcholeskyflop = (double)(0);
47007         dbgcholesky4flop = (double)(0);
47008         isetv(analysis->nsuper, 0, ttmp0, _state);
47009         for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
47010         {
47011 
47012             /*
47013              * Node sizes
47014              */
47015             if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]==1 )
47016             {
47017                 inc(&dbgrank1nodes, _state);
47018             }
47019             if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]==2 )
47020             {
47021                 inc(&dbgrank2nodes, _state);
47022             }
47023             if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]==3 )
47024             {
47025                 inc(&dbgrank3nodes, _state);
47026             }
47027             if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]==4 )
47028             {
47029                 inc(&dbgrank4nodes, _state);
47030             }
47031             if( analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx]>4 )
47032             {
47033                 inc(&dbgbignodes, _state);
47034             }
47035 
47036             /*
47037              * FLOP counts
47038              */
47039             twidth = analysis->supercolrange.ptr.p_int[sidx+1]-analysis->supercolrange.ptr.p_int[sidx];
47040             theight = twidth+(analysis->superrowridx.ptr.p_int[sidx+1]-analysis->superrowridx.ptr.p_int[sidx]);
47041             for(i=analysis->ladjplusr.ptr.p_int[sidx]; i<=analysis->ladjplusr.ptr.p_int[sidx+1]-1; i++)
47042             {
47043                 uidx = analysis->ladjplus.ptr.p_int[i];
47044 
47045                 /*
47046                  * Determine update width, height, rank
47047                  */
47048                 wrkrow = ttmp0->ptr.p_int[uidx];
47049                 offdiagrow = wrkrow;
47050                 lastrow = analysis->superrowridx.ptr.p_int[uidx+1]-analysis->superrowridx.ptr.p_int[uidx];
47051                 while(offdiagrow<lastrow&&analysis->superrowidx.ptr.p_int[analysis->superrowridx.ptr.p_int[uidx]+offdiagrow]<analysis->supercolrange.ptr.p_int[sidx+1])
47052                 {
47053                     offdiagrow = offdiagrow+1;
47054                 }
47055                 uwidth = offdiagrow-wrkrow;
47056                 uheight = lastrow-wrkrow;
47057                 urank = analysis->supercolrange.ptr.p_int[uidx+1]-analysis->supercolrange.ptr.p_int[uidx];
47058                 ttmp0->ptr.p_int[uidx] = offdiagrow;
47059 
47060                 /*
47061                  * Compute update FLOP cost
47062                  */
47063                 uflop = rmul3((double)(uwidth), (double)(uheight), (double)(urank), _state);
47064                 dbgtotalflop = dbgtotalflop+uflop;
47065                 if( uheight==theight&&uwidth==twidth )
47066                 {
47067                     dbgnoscatterflop = dbgnoscatterflop+uflop;
47068                 }
47069                 if( uheight==theight )
47070                 {
47071                     dbgnorowscatterflop = dbgnorowscatterflop+uflop;
47072                 }
47073                 if( uwidth==twidth )
47074                 {
47075                     dbgnocolscatterflop = dbgnocolscatterflop+uflop;
47076                 }
47077                 if( urank==1 )
47078                 {
47079                     dbgrank1flop = dbgrank1flop+uflop;
47080                 }
47081                 if( urank>=4 )
47082                 {
47083                     dbgrank4plusflop = dbgrank4plusflop+uflop;
47084                 }
47085                 if( (urank==4&&uwidth==4)&&twidth==4 )
47086                 {
47087                     dbg444flop = dbg444flop+uflop;
47088                 }
47089                 if( twidth==4 )
47090                 {
47091                     dbgxx4flop = dbgxx4flop+uflop;
47092                 }
47093             }
47094             uflop = (double)(0);
47095             for(i=0; i<=twidth-1; i++)
47096             {
47097                 uflop = uflop+(theight-i)*i+(theight-i);
47098             }
47099             dbgtotalflop = dbgtotalflop+uflop;
47100             dbgcholeskyflop = dbgcholeskyflop+uflop;
47101             if( twidth==4 )
47102             {
47103                 dbgcholesky4flop = dbgcholesky4flop+uflop;
47104             }
47105         }
47106 
47107         /*
47108          * Output
47109          */
47110         ae_trace("> node size statistics:\n");
47111         ae_trace("rank1        = %6d\n",
47112             (int)(dbgrank1nodes));
47113         ae_trace("rank2        = %6d\n",
47114             (int)(dbgrank2nodes));
47115         ae_trace("rank3        = %6d\n",
47116             (int)(dbgrank3nodes));
47117         ae_trace("rank4        = %6d\n",
47118             (int)(dbgrank4nodes));
47119         ae_trace("big nodes    = %6d\n",
47120             (int)(dbgbignodes));
47121         ae_trace("> Total FLOP count (fused multiply-adds):\n");
47122         ae_trace("total        = %8.2f MFLOP\n",
47123             (double)(1.0E-6*dbgtotalflop));
47124         ae_trace("> FLOP counts for updates:\n");
47125         ae_trace("no-sctr      = %8.2f MFLOP    (no row scatter, no col scatter, best case)\n",
47126             (double)(1.0E-6*dbgnoscatterflop));
47127         ae_trace("M4*44->N4    = %8.2f MFLOP    (no col scatter, big blocks, good case)\n",
47128             (double)(1.0E-6*dbg444flop));
47129         ae_trace("no-row-sctr  = %8.2f MFLOP    (no row scatter, good case for col-wise storage)\n",
47130             (double)(1.0E-6*dbgnorowscatterflop));
47131         ae_trace("no-col-sctr  = %8.2f MFLOP    (no col scatter, good case for row-wise storage)\n",
47132             (double)(1.0E-6*dbgnocolscatterflop));
47133         ae_trace("XX*XX->N4    = %8.2f MFLOP\n",
47134             (double)(1.0E-6*dbgxx4flop));
47135         ae_trace("rank1        = %8.2f MFLOP\n",
47136             (double)(1.0E-6*dbgrank1flop));
47137         ae_trace("rank4+       = %8.2f MFLOP\n",
47138             (double)(1.0E-6*dbgrank4plusflop));
47139         ae_trace("> FLOP counts for Cholesky:\n");
47140         ae_trace("cholesky     = %8.2f MFLOP\n",
47141             (double)(1.0E-6*dbgcholeskyflop));
47142         ae_trace("cholesky4    = %8.2f MFLOP\n",
47143             (double)(1.0E-6*dbgcholesky4flop));
47144     }
47145 }
47146 
47147 
47148 /*************************************************************************
47149 This function loads matrix into the supernodal storage.
47150 
47151   -- ALGLIB PROJECT --
47152      Copyright 05.10.2020 by Bochkanov Sergey.
47153 *************************************************************************/
spchol_loadmatrix(spcholanalysis * analysis,sparsematrix * at,ae_state * _state)47154 static void spchol_loadmatrix(spcholanalysis* analysis,
47155      sparsematrix* at,
47156      ae_state *_state)
47157 {
47158     ae_int_t i;
47159     ae_int_t j;
47160     ae_int_t k;
47161     ae_int_t ii;
47162     ae_int_t i0;
47163     ae_int_t i1;
47164     ae_int_t n;
47165     ae_int_t cols0;
47166     ae_int_t cols1;
47167     ae_int_t offss;
47168     ae_int_t sstride;
47169     ae_int_t blocksize;
47170     ae_int_t sidx;
47171 
47172 
47173     n = analysis->n;
47174     iallocv(n, &analysis->raw2smap, _state);
47175     rsetallocv(analysis->rowoffsets.ptr.p_int[analysis->nsuper], 0.0, &analysis->inputstorage, _state);
47176     for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
47177     {
47178         cols0 = analysis->supercolrange.ptr.p_int[sidx];
47179         cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
47180         blocksize = cols1-cols0;
47181         offss = analysis->rowoffsets.ptr.p_int[sidx];
47182         sstride = analysis->rowstrides.ptr.p_int[sidx];
47183 
47184         /*
47185          * Load supernode #SIdx using Raw2SMap to perform quick transformation between global and local indexing.
47186          */
47187         for(i=cols0; i<=cols1-1; i++)
47188         {
47189             analysis->raw2smap.ptr.p_int[i] = i-cols0;
47190         }
47191         for(k=analysis->superrowridx.ptr.p_int[sidx]; k<=analysis->superrowridx.ptr.p_int[sidx+1]-1; k++)
47192         {
47193             analysis->raw2smap.ptr.p_int[analysis->superrowidx.ptr.p_int[k]] = blocksize+(k-analysis->superrowridx.ptr.p_int[sidx]);
47194         }
47195         for(j=cols0; j<=cols1-1; j++)
47196         {
47197             i0 = at->ridx.ptr.p_int[j];
47198             i1 = at->ridx.ptr.p_int[j+1]-1;
47199             for(ii=i0; ii<=i1; ii++)
47200             {
47201                 analysis->inputstorage.ptr.p_double[offss+analysis->raw2smap.ptr.p_int[at->idx.ptr.p_int[ii]]*sstride+(j-cols0)] = at->vals.ptr.p_double[ii];
47202             }
47203         }
47204     }
47205 }
47206 
47207 
47208 /*************************************************************************
47209 This function extracts computed matrix from the supernodal storage.
47210 Depending on settings, a supernodal permutation can be applied to the matrix.
47211 
47212 INPUT PARAMETERS
47213     Analysis    -   analysis object with completely initialized supernodal
47214                     structure
47215     Offsets     -   offsets for supernodal storage
47216     Strides     -   row strides for supernodal storage
47217     RowStorage  -   supernodal storage
47218     DiagD       -   diagonal factor
47219     N           -   problem size
47220 
47221     TmpP        -   preallocated temporary array[N+1]
47222 
47223 OUTPUT PARAMETERS
47224     A           -   sparse matrix in CRS format:
47225                     * for PermType=0, sparse matrix in the original ordering
47226                       (i.e. the matrix is reordered prior to output that
47227                       may require considerable amount of operations due to
47228                       permutation being applied)
47229                     * for PermType=1, sparse matrix in the topological
47230                       ordering. The least overhead for output.
47231     D           -   array[N], diagonal
47232     P           -   output permutation in product form
47233 
47234   -- ALGLIB PROJECT --
47235      Copyright 05.10.2020 by Bochkanov Sergey.
47236 *************************************************************************/
spchol_extractmatrix(spcholanalysis * analysis,ae_vector * offsets,ae_vector * strides,ae_vector * rowstorage,ae_vector * diagd,ae_int_t n,sparsematrix * a,ae_vector * d,ae_vector * p,ae_vector * tmpp,ae_state * _state)47237 static void spchol_extractmatrix(spcholanalysis* analysis,
47238      /* Integer */ ae_vector* offsets,
47239      /* Integer */ ae_vector* strides,
47240      /* Real    */ ae_vector* rowstorage,
47241      /* Real    */ ae_vector* diagd,
47242      ae_int_t n,
47243      sparsematrix* a,
47244      /* Real    */ ae_vector* d,
47245      /* Integer */ ae_vector* p,
47246      /* Integer */ ae_vector* tmpp,
47247      ae_state *_state)
47248 {
47249     ae_int_t i;
47250     ae_int_t j;
47251     ae_int_t k;
47252     ae_int_t sidx;
47253     ae_int_t i0;
47254     ae_int_t ii;
47255     ae_int_t rfirst;
47256     ae_int_t rlast;
47257     ae_int_t cols0;
47258     ae_int_t cols1;
47259     ae_int_t blocksize;
47260     ae_int_t rowstride;
47261     ae_int_t offdiagsize;
47262     ae_int_t offssdiag;
47263 
47264 
47265     ae_assert(tmpp->cnt>=n+1, "ExtractMatrix: preallocated temporary TmpP is too short", _state);
47266 
47267     /*
47268      * Basic initialization
47269      */
47270     a->matrixtype = 1;
47271     a->n = n;
47272     a->m = n;
47273 
47274     /*
47275      * Various permutation types
47276      */
47277     if( analysis->applypermutationtooutput )
47278     {
47279         ae_assert(analysis->istopologicalordering, "ExtractMatrix: critical integrity check failed (attempt to merge in nontopological permutation)", _state);
47280 
47281         /*
47282          * Output matrix is topologically permuted, so we return A=L*L' instead of A=P*L*L'*P'.
47283          * Somewhat inefficient because we have to apply permutation to L returned by supernodal code.
47284          */
47285         ivectorsetlengthatleast(&a->ridx, n+1, _state);
47286         ivectorsetlengthatleast(&a->didx, n, _state);
47287         a->ridx.ptr.p_int[0] = 0;
47288         for(i=0; i<=n-1; i++)
47289         {
47290             a->ridx.ptr.p_int[i+1] = a->ridx.ptr.p_int[i]+analysis->outrowcounts.ptr.p_int[analysis->effectiveperm.ptr.p_int[i]];
47291         }
47292         for(i=0; i<=n-1; i++)
47293         {
47294             a->didx.ptr.p_int[i] = a->ridx.ptr.p_int[i];
47295         }
47296         a->ninitialized = a->ridx.ptr.p_int[n];
47297         rvectorsetlengthatleast(&a->vals, a->ninitialized, _state);
47298         ivectorsetlengthatleast(&a->idx, a->ninitialized, _state);
47299         for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
47300         {
47301             cols0 = analysis->supercolrange.ptr.p_int[sidx];
47302             cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
47303             rfirst = analysis->superrowridx.ptr.p_int[sidx];
47304             rlast = analysis->superrowridx.ptr.p_int[sidx+1];
47305             blocksize = cols1-cols0;
47306             offdiagsize = rlast-rfirst;
47307             rowstride = strides->ptr.p_int[sidx];
47308             offssdiag = offsets->ptr.p_int[sidx];
47309             for(i=0; i<=blocksize-1; i++)
47310             {
47311                 i0 = analysis->inveffectiveperm.ptr.p_int[cols0+i];
47312                 ii = a->didx.ptr.p_int[i0];
47313                 for(j=0; j<=i; j++)
47314                 {
47315                     a->idx.ptr.p_int[ii] = analysis->inveffectiveperm.ptr.p_int[cols0+j];
47316                     a->vals.ptr.p_double[ii] = rowstorage->ptr.p_double[offssdiag+i*rowstride+j];
47317                     ii = ii+1;
47318                 }
47319                 a->didx.ptr.p_int[i0] = ii;
47320             }
47321             for(k=0; k<=offdiagsize-1; k++)
47322             {
47323                 i0 = analysis->inveffectiveperm.ptr.p_int[analysis->superrowidx.ptr.p_int[k+rfirst]];
47324                 ii = a->didx.ptr.p_int[i0];
47325                 for(j=0; j<=blocksize-1; j++)
47326                 {
47327                     a->idx.ptr.p_int[ii] = analysis->inveffectiveperm.ptr.p_int[cols0+j];
47328                     a->vals.ptr.p_double[ii] = rowstorage->ptr.p_double[offssdiag+(blocksize+k)*rowstride+j];
47329                     ii = ii+1;
47330                 }
47331                 a->didx.ptr.p_int[i0] = ii;
47332             }
47333         }
47334         for(i=0; i<=n-1; i++)
47335         {
47336             ae_assert(a->didx.ptr.p_int[i]==a->ridx.ptr.p_int[i+1], "ExtractMatrix: integrity check failed (9473t)", _state);
47337             tagsortmiddleir(&a->idx, &a->vals, a->ridx.ptr.p_int[i], a->ridx.ptr.p_int[i+1]-a->ridx.ptr.p_int[i], _state);
47338             ae_assert(a->idx.ptr.p_int[a->ridx.ptr.p_int[i+1]-1]==i, "ExtractMatrix: integrity check failed (e4tfd)", _state);
47339         }
47340         sparseinitduidx(a, _state);
47341 
47342         /*
47343          * Prepare D[] and P[]
47344          */
47345         rvectorsetlengthatleast(d, n, _state);
47346         ivectorsetlengthatleast(p, n, _state);
47347         for(i=0; i<=n-1; i++)
47348         {
47349             d->ptr.p_double[i] = diagd->ptr.p_double[analysis->effectiveperm.ptr.p_int[i]];
47350             p->ptr.p_int[i] = i;
47351         }
47352     }
47353     else
47354     {
47355 
47356         /*
47357          * The permutation is NOT applied to L prior to extraction,
47358          * we return both L and P: A=P*L*L'*P'.
47359          */
47360         ivectorsetlengthatleast(&a->ridx, n+1, _state);
47361         ivectorsetlengthatleast(&a->didx, n, _state);
47362         a->ridx.ptr.p_int[0] = 0;
47363         for(i=0; i<=n-1; i++)
47364         {
47365             a->ridx.ptr.p_int[i+1] = a->ridx.ptr.p_int[i]+analysis->outrowcounts.ptr.p_int[i];
47366         }
47367         for(i=0; i<=n-1; i++)
47368         {
47369             a->didx.ptr.p_int[i] = a->ridx.ptr.p_int[i];
47370         }
47371         a->ninitialized = a->ridx.ptr.p_int[n];
47372         rvectorsetlengthatleast(&a->vals, a->ninitialized, _state);
47373         ivectorsetlengthatleast(&a->idx, a->ninitialized, _state);
47374         for(sidx=0; sidx<=analysis->nsuper-1; sidx++)
47375         {
47376             cols0 = analysis->supercolrange.ptr.p_int[sidx];
47377             cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
47378             rfirst = analysis->superrowridx.ptr.p_int[sidx];
47379             rlast = analysis->superrowridx.ptr.p_int[sidx+1];
47380             blocksize = cols1-cols0;
47381             offdiagsize = rlast-rfirst;
47382             rowstride = strides->ptr.p_int[sidx];
47383             offssdiag = offsets->ptr.p_int[sidx];
47384             for(i=0; i<=blocksize-1; i++)
47385             {
47386                 i0 = cols0+i;
47387                 ii = a->didx.ptr.p_int[i0];
47388                 for(j=0; j<=i; j++)
47389                 {
47390                     a->idx.ptr.p_int[ii] = cols0+j;
47391                     a->vals.ptr.p_double[ii] = rowstorage->ptr.p_double[offssdiag+i*rowstride+j];
47392                     ii = ii+1;
47393                 }
47394                 a->didx.ptr.p_int[i0] = ii;
47395             }
47396             for(k=0; k<=offdiagsize-1; k++)
47397             {
47398                 i0 = analysis->superrowidx.ptr.p_int[k+rfirst];
47399                 ii = a->didx.ptr.p_int[i0];
47400                 for(j=0; j<=blocksize-1; j++)
47401                 {
47402                     a->idx.ptr.p_int[ii] = cols0+j;
47403                     a->vals.ptr.p_double[ii] = rowstorage->ptr.p_double[offssdiag+(blocksize+k)*rowstride+j];
47404                     ii = ii+1;
47405                 }
47406                 a->didx.ptr.p_int[i0] = ii;
47407             }
47408         }
47409         for(i=0; i<=n-1; i++)
47410         {
47411             ae_assert(a->didx.ptr.p_int[i]==a->ridx.ptr.p_int[i+1], "ExtractMatrix: integrity check failed (34e43)", _state);
47412             ae_assert(a->idx.ptr.p_int[a->ridx.ptr.p_int[i+1]-1]==i, "ExtractMatrix: integrity check failed (k4df5)", _state);
47413         }
47414         sparseinitduidx(a, _state);
47415 
47416         /*
47417          * Extract diagonal
47418          */
47419         rvectorsetlengthatleast(d, n, _state);
47420         for(i=0; i<=n-1; i++)
47421         {
47422             d->ptr.p_double[i] = diagd->ptr.p_double[i];
47423         }
47424 
47425         /*
47426          * Convert permutation table into product form
47427          */
47428         ivectorsetlengthatleast(p, n, _state);
47429         for(i=0; i<=n-1; i++)
47430         {
47431             p->ptr.p_int[i] = i;
47432             tmpp->ptr.p_int[i] = i;
47433         }
47434         for(i=0; i<=n-1; i++)
47435         {
47436 
47437             /*
47438              * We need to move element K to position I.
47439              * J is where K actually stored
47440              */
47441             k = analysis->inveffectiveperm.ptr.p_int[i];
47442             j = tmpp->ptr.p_int[k];
47443 
47444             /*
47445              * Swap elements of P[I:N-1] that is used to store current locations of elements in different way
47446              */
47447             i0 = p->ptr.p_int[i];
47448             p->ptr.p_int[i] = p->ptr.p_int[j];
47449             p->ptr.p_int[j] = i0;
47450 
47451             /*
47452              * record pivoting of positions I and J
47453              */
47454             p->ptr.p_int[i] = j;
47455             tmpp->ptr.p_int[i0] = j;
47456         }
47457     }
47458 }
47459 
47460 
47461 /*************************************************************************
47462 Sparisity pattern of partial Cholesky.
47463 
47464 This function splits lower triangular L into two parts: leading HEAD  cols
47465 and trailing TAIL*TAIL submatrix. Then it computes sparsity pattern of the
47466 Cholesky decomposition of the HEAD, extracts bottom TAIL*HEAD update matrix
47467 U and applies it to the tail:
47468 
47469     pattern(TAIL) += pattern(U*U')
47470 
47471 The pattern(TAIL) is returned. It is important that pattern(TAIL)  is  not
47472 the sparsity pattern of trailing Cholesky factor, it is the pattern of the
47473 temporary matrix that will be factorized.
47474 
47475 The sparsity pattern of HEAD is NOT returned.
47476 
47477 INPUT PARAMETERS:
47478     A       -   lower triangular  matrix  A whose partial sparsity pattern
47479                 is  needed.  Only  sparsity  structure  matters,  specific
47480                 element values are ignored.
47481     Head,Tail-  sizes of the leading/traling submatrices
47482 
47483     tmpParent,
47484     tmpChildrenR,
47485     cmpChildrenI
47486     tmp1,
47487     FlagArray
47488             -   preallocated temporary arrays, length at least Head+Tail
47489     tmpBottomT,
47490     tmpUpdateT,
47491     tmpUpdate-  temporary sparsematrix instances; previously allocated
47492                 space will be reused.
47493 
47494 OUTPUT PARAMETERS:
47495     ATail   -   sparsity pattern of the lower triangular temporary  matrix
47496                 computed prior to Cholesky factorization. Matrix  elements
47497                 are initialized by placeholder values.
47498 
47499   -- ALGLIB PROJECT --
47500      Copyright 21.08.2021 by Bochkanov Sergey.
47501 *************************************************************************/
spchol_partialcholeskypattern(sparsematrix * a,ae_int_t head,ae_int_t tail,sparsematrix * atail,ae_vector * tmpparent,ae_vector * tmpchildrenr,ae_vector * tmpchildreni,ae_vector * tmp1,ae_vector * flagarray,sparsematrix * tmpbottomt,sparsematrix * tmpupdatet,sparsematrix * tmpupdate,sparsematrix * tmpnewtailt,ae_state * _state)47502 static void spchol_partialcholeskypattern(sparsematrix* a,
47503      ae_int_t head,
47504      ae_int_t tail,
47505      sparsematrix* atail,
47506      /* Integer */ ae_vector* tmpparent,
47507      /* Integer */ ae_vector* tmpchildrenr,
47508      /* Integer */ ae_vector* tmpchildreni,
47509      /* Integer */ ae_vector* tmp1,
47510      /* Boolean */ ae_vector* flagarray,
47511      sparsematrix* tmpbottomt,
47512      sparsematrix* tmpupdatet,
47513      sparsematrix* tmpupdate,
47514      sparsematrix* tmpnewtailt,
47515      ae_state *_state)
47516 {
47517     ae_int_t i;
47518     ae_int_t j;
47519     ae_int_t k;
47520     ae_int_t i1;
47521     ae_int_t ii;
47522     ae_int_t j1;
47523     ae_int_t jj;
47524     ae_int_t kb;
47525     ae_int_t cursize;
47526     double v;
47527 
47528 
47529     ae_assert(a->m==head+tail, "PartialCholeskyPattern: rows(A)!=Head+Tail", _state);
47530     ae_assert(a->n==head+tail, "PartialCholeskyPattern: cols(A)!=Head+Tail", _state);
47531     ae_assert(tmpparent->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmpParent)<Head+Tail+1", _state);
47532     ae_assert(tmpchildrenr->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmpChildrenR)<Head+Tail+1", _state);
47533     ae_assert(tmpchildreni->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmpChildrenI)<Head+Tail+1", _state);
47534     ae_assert(tmp1->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmp1)<Head+Tail+1", _state);
47535     ae_assert(flagarray->cnt>=head+tail+1, "PartialCholeskyPattern: Length(tmp1)<Head+Tail+1", _state);
47536     cursize = head+tail;
47537     v = (double)1/(double)cursize;
47538 
47539     /*
47540      * Compute leading Head columns of the Cholesky decomposition of A.
47541      * These columns will be used later to update sparsity pattern of the trailing
47542      * Tail*Tail matrix.
47543      *
47544      * Actually, we need just bottom Tail rows of these columns whose transpose (a
47545      * Head*Tail matrix) is stored in the tmpBottomT matrix. In order to do so in
47546      * the most efficient way we analyze elimination tree of the reordered matrix.
47547      *
47548      * In addition to BOTTOM matrix B we also compute an UPDATE matrix U which does
47549      * not include rows with duplicating sparsity patterns (only parents in the
47550      * elimination tree are included). Using update matrix to compute the sparsity
47551      * pattern is much more efficient because we do not spend time on children columns.
47552      *
47553      * NOTE: because Cholesky decomposition deals with matrix columns, we transpose
47554      *       A, store it into ATail, and work with transposed matrix.
47555      */
47556     sparsecopytransposecrsbuf(a, atail, _state);
47557     spchol_buildunorderedetree(a, cursize, tmpparent, tmp1, _state);
47558     spchol_fromparenttochildren(tmpparent, cursize, tmpchildrenr, tmpchildreni, tmp1, _state);
47559     tmpbottomt->m = head;
47560     tmpbottomt->n = tail;
47561     iallocv(head+1, &tmpbottomt->ridx, _state);
47562     tmpbottomt->ridx.ptr.p_int[0] = 0;
47563     tmpupdatet->m = head;
47564     tmpupdatet->n = tail;
47565     iallocv(head+1, &tmpupdatet->ridx, _state);
47566     tmpupdatet->ridx.ptr.p_int[0] = 0;
47567     bsetv(tail, ae_false, flagarray, _state);
47568     for(j=0; j<=head-1; j++)
47569     {
47570 
47571         /*
47572          * Start J-th row of the tmpBottomT
47573          */
47574         kb = tmpbottomt->ridx.ptr.p_int[j];
47575         igrowv(kb+tail, &tmpbottomt->idx, _state);
47576         rgrowv(kb+tail, &tmpbottomt->vals, _state);
47577 
47578         /*
47579          * copy sparsity pattern J-th column of the reordered matrix
47580          */
47581         jj = atail->didx.ptr.p_int[j];
47582         j1 = atail->ridx.ptr.p_int[j+1]-1;
47583         while(jj<=j1&&atail->idx.ptr.p_int[jj]<head)
47584         {
47585             jj = jj+1;
47586         }
47587         while(jj<=j1)
47588         {
47589             i = atail->idx.ptr.p_int[jj]-head;
47590             tmpbottomt->idx.ptr.p_int[kb] = i;
47591             tmpbottomt->vals.ptr.p_double[kb] = v;
47592             flagarray->ptr.p_bool[i] = ae_true;
47593             kb = kb+1;
47594             jj = jj+1;
47595         }
47596 
47597         /*
47598          * Fetch sparsity pattern from the immediate children in the elimination tree
47599          */
47600         for(jj=tmpchildrenr->ptr.p_int[j]; jj<=tmpchildrenr->ptr.p_int[j+1]-1; jj++)
47601         {
47602             j1 = tmpchildreni->ptr.p_int[jj];
47603             ii = tmpbottomt->ridx.ptr.p_int[j1];
47604             i1 = tmpbottomt->ridx.ptr.p_int[j1+1]-1;
47605             while(ii<=i1)
47606             {
47607                 i = tmpbottomt->idx.ptr.p_int[ii];
47608                 if( !flagarray->ptr.p_bool[i] )
47609                 {
47610                     tmpbottomt->idx.ptr.p_int[kb] = i;
47611                     tmpbottomt->vals.ptr.p_double[kb] = v;
47612                     flagarray->ptr.p_bool[i] = ae_true;
47613                     kb = kb+1;
47614                 }
47615                 ii = ii+1;
47616             }
47617         }
47618 
47619         /*
47620          * Finalize row of tmpBottomT
47621          */
47622         for(ii=tmpbottomt->ridx.ptr.p_int[j]; ii<=kb-1; ii++)
47623         {
47624             flagarray->ptr.p_bool[tmpbottomt->idx.ptr.p_int[ii]] = ae_false;
47625         }
47626         tmpbottomt->ridx.ptr.p_int[j+1] = kb;
47627 
47628         /*
47629          * Only columns that forward their sparsity pattern directly into the tail are added to tmpUpdateT
47630          */
47631         if( tmpparent->ptr.p_int[j]>=head )
47632         {
47633 
47634             /*
47635              * J-th column of the head forwards its sparsity pattern directly into the tail, save it to tmpUpdateT
47636              */
47637             k = tmpupdatet->ridx.ptr.p_int[j];
47638             igrowv(k+tail, &tmpupdatet->idx, _state);
47639             rgrowv(k+tail, &tmpupdatet->vals, _state);
47640             jj = tmpbottomt->ridx.ptr.p_int[j];
47641             j1 = tmpbottomt->ridx.ptr.p_int[j+1]-1;
47642             while(jj<=j1)
47643             {
47644                 i = tmpbottomt->idx.ptr.p_int[jj];
47645                 tmpupdatet->idx.ptr.p_int[k] = i;
47646                 tmpupdatet->vals.ptr.p_double[k] = v;
47647                 k = k+1;
47648                 jj = jj+1;
47649             }
47650             tmpupdatet->ridx.ptr.p_int[j+1] = k;
47651         }
47652         else
47653         {
47654 
47655             /*
47656              * J-th column of the head forwards its sparsity pattern to another column in the head,
47657              * no need to save it to tmpUpdateT. Save empty row.
47658              */
47659             k = tmpupdatet->ridx.ptr.p_int[j];
47660             tmpupdatet->ridx.ptr.p_int[j+1] = k;
47661         }
47662     }
47663     sparsecreatecrsinplace(tmpupdatet, _state);
47664     sparsecopytransposecrsbuf(tmpupdatet, tmpupdate, _state);
47665 
47666     /*
47667      * Apply update U*U' to the trailing Tail*Tail matrix and generate new
47668      * residual matrix in tmpNewTailT. Then transpose/copy it to TmpA[].
47669      */
47670     bsetv(tail, ae_false, flagarray, _state);
47671     tmpnewtailt->m = tail;
47672     tmpnewtailt->n = tail;
47673     iallocv(tail+1, &tmpnewtailt->ridx, _state);
47674     tmpnewtailt->ridx.ptr.p_int[0] = 0;
47675     for(j=0; j<=tail-1; j++)
47676     {
47677         k = tmpnewtailt->ridx.ptr.p_int[j];
47678         igrowv(k+tail, &tmpnewtailt->idx, _state);
47679         rgrowv(k+tail, &tmpnewtailt->vals, _state);
47680 
47681         /*
47682          * Copy row from the reordered/transposed matrix stored in TmpA
47683          */
47684         tmpnewtailt->idx.ptr.p_int[k] = j;
47685         tmpnewtailt->vals.ptr.p_double[k] = (double)(1);
47686         flagarray->ptr.p_bool[j] = ae_true;
47687         k = k+1;
47688         jj = atail->didx.ptr.p_int[head+j]+1;
47689         j1 = atail->ridx.ptr.p_int[head+j+1]-1;
47690         while(jj<=j1)
47691         {
47692             i = atail->idx.ptr.p_int[jj]-head;
47693             tmpnewtailt->idx.ptr.p_int[k] = i;
47694             tmpnewtailt->vals.ptr.p_double[k] = v;
47695             flagarray->ptr.p_bool[i] = ae_true;
47696             k = k+1;
47697             jj = jj+1;
47698         }
47699 
47700         /*
47701          * Apply update U*U' to J-th column of new tail (J-th row of tmpNewTailT):
47702          * * scan J-th row of U
47703          * * for each nonzero element, append corresponding row of U' (elements from J+1-th) to tmpNewTailT
47704          * * FlagArray[] is used to avoid duplication of nonzero elements
47705          */
47706         jj = tmpupdate->ridx.ptr.p_int[j];
47707         j1 = tmpupdate->ridx.ptr.p_int[j+1]-1;
47708         while(jj<=j1)
47709         {
47710 
47711             /*
47712              * Get row of U', skip leading elements up to J-th
47713              */
47714             ii = tmpupdatet->ridx.ptr.p_int[tmpupdate->idx.ptr.p_int[jj]];
47715             i1 = tmpupdatet->ridx.ptr.p_int[tmpupdate->idx.ptr.p_int[jj]+1]-1;
47716             while(ii<=i1&&tmpupdatet->idx.ptr.p_int[ii]<=j)
47717             {
47718                 ii = ii+1;
47719             }
47720 
47721             /*
47722              * Append the rest of the row to tmpNewTailT
47723              */
47724             while(ii<=i1)
47725             {
47726                 i = tmpupdatet->idx.ptr.p_int[ii];
47727                 if( !flagarray->ptr.p_bool[i] )
47728                 {
47729                     tmpnewtailt->idx.ptr.p_int[k] = i;
47730                     tmpnewtailt->vals.ptr.p_double[k] = v;
47731                     flagarray->ptr.p_bool[i] = ae_true;
47732                     k = k+1;
47733                 }
47734                 ii = ii+1;
47735             }
47736 
47737             /*
47738              * Continue or stop early (if we completely filled output buffer)
47739              */
47740             if( k-tmpnewtailt->ridx.ptr.p_int[j]==tail-j )
47741             {
47742                 break;
47743             }
47744             jj = jj+1;
47745         }
47746 
47747         /*
47748          * Finalize:
47749          * * clean up FlagArray[]
47750          * * save K to RIdx[]
47751          */
47752         for(ii=tmpnewtailt->ridx.ptr.p_int[j]; ii<=k-1; ii++)
47753         {
47754             flagarray->ptr.p_bool[tmpnewtailt->idx.ptr.p_int[ii]] = ae_false;
47755         }
47756         tmpnewtailt->ridx.ptr.p_int[j+1] = k;
47757     }
47758     sparsecreatecrsinplace(tmpnewtailt, _state);
47759     sparsecopytransposecrsbuf(tmpnewtailt, atail, _state);
47760 }
47761 
47762 
47763 /*************************************************************************
47764 This function is a specialized version of SparseSymmPermTbl()  that  takes
47765 into   account specifics of topological reorderings (improves performance)
47766 and additionally transposes its output.
47767 
47768 INPUT PARAMETERS
47769     A           -   sparse lower triangular matrix in CRS format.
47770     P           -   array[N] which stores permutation table;  P[I]=J means
47771                     that I-th row/column of matrix  A  is  moved  to  J-th
47772                     position. For performance reasons we do NOT check that
47773                     P[] is  a   correct   permutation  (that there  is  no
47774                     repetitions, just that all its elements  are  in [0,N)
47775                     range.
47776     B           -   sparse matrix object that will hold output.
47777                     Previously allocated memory will be reused as much  as
47778                     possible.
47779 
47780 OUTPUT PARAMETERS
47781     B           -   permuted and transposed upper triangular matrix in the
47782                     special internal CRS-like matrix format (MatrixType=-10082).
47783 
47784   -- ALGLIB PROJECT --
47785      Copyright 05.10.2020 by Bochkanov Sergey.
47786 *************************************************************************/
spchol_topologicalpermutation(sparsematrix * a,ae_vector * p,sparsematrix * b,ae_state * _state)47787 static void spchol_topologicalpermutation(sparsematrix* a,
47788      /* Integer */ ae_vector* p,
47789      sparsematrix* b,
47790      ae_state *_state)
47791 {
47792     ae_int_t i;
47793     ae_int_t j;
47794     ae_int_t jj;
47795     ae_int_t j0;
47796     ae_int_t j1;
47797     ae_int_t k;
47798     ae_int_t k0;
47799     ae_int_t n;
47800     ae_bool bflag;
47801 
47802 
47803     ae_assert(a->matrixtype==1, "TopologicalPermutation: incorrect matrix type (convert your matrix to CRS)", _state);
47804     ae_assert(p->cnt>=a->n, "TopologicalPermutation: Length(P)<N", _state);
47805     ae_assert(a->m==a->n, "TopologicalPermutation: matrix is non-square", _state);
47806     ae_assert(a->ninitialized==a->ridx.ptr.p_int[a->n], "TopologicalPermutation: integrity check failed", _state);
47807     bflag = ae_true;
47808     n = a->n;
47809     for(i=0; i<=n-1; i++)
47810     {
47811         j = p->ptr.p_int[i];
47812         bflag = (bflag&&j>=0)&&j<n;
47813     }
47814     ae_assert(bflag, "TopologicalPermutation: P[] contains values outside of [0,N) range", _state);
47815 
47816     /*
47817      * Prepare output
47818      */
47819     b->matrixtype = -10082;
47820     b->n = n;
47821     b->m = n;
47822     ivectorsetlengthatleast(&b->didx, n, _state);
47823     ivectorsetlengthatleast(&b->uidx, n, _state);
47824 
47825     /*
47826      * Determine row sizes (temporary stored in DIdx) and ranges
47827      */
47828     isetv(n, 0, &b->uidx, _state);
47829     for(i=0; i<=n-1; i++)
47830     {
47831         j0 = a->ridx.ptr.p_int[i];
47832         j1 = a->uidx.ptr.p_int[i]-1;
47833         for(jj=j0; jj<=j1; jj++)
47834         {
47835             j = a->idx.ptr.p_int[jj];
47836             b->uidx.ptr.p_int[j] = b->uidx.ptr.p_int[j]+1;
47837         }
47838     }
47839     for(i=0; i<=n-1; i++)
47840     {
47841         b->didx.ptr.p_int[p->ptr.p_int[i]] = b->uidx.ptr.p_int[i];
47842     }
47843     ivectorsetlengthatleast(&b->ridx, n+1, _state);
47844     b->ridx.ptr.p_int[0] = 0;
47845     for(i=0; i<=n-1; i++)
47846     {
47847         b->ridx.ptr.p_int[i+1] = b->ridx.ptr.p_int[i]+b->didx.ptr.p_int[i];
47848         b->uidx.ptr.p_int[i] = b->ridx.ptr.p_int[i];
47849     }
47850     b->ninitialized = b->ridx.ptr.p_int[n];
47851     ivectorsetlengthatleast(&b->idx, b->ninitialized, _state);
47852     rvectorsetlengthatleast(&b->vals, b->ninitialized, _state);
47853 
47854     /*
47855      * Process matrix
47856      */
47857     for(i=0; i<=n-1; i++)
47858     {
47859         j0 = a->ridx.ptr.p_int[i];
47860         j1 = a->uidx.ptr.p_int[i]-1;
47861         k = p->ptr.p_int[i];
47862         for(jj=j0; jj<=j1; jj++)
47863         {
47864             j = p->ptr.p_int[a->idx.ptr.p_int[jj]];
47865             k0 = b->uidx.ptr.p_int[j];
47866             b->idx.ptr.p_int[k0] = k;
47867             b->vals.ptr.p_double[k0] = a->vals.ptr.p_double[jj];
47868             b->uidx.ptr.p_int[j] = k0+1;
47869         }
47870     }
47871 }
47872 
47873 
47874 /*************************************************************************
47875 Determine nonzero pattern of the column.
47876 
47877 This function takes as input:
47878 * A^T - transpose of original input matrix
47879 * index of column of L being computed
47880 * SuperRowRIdx[] and SuperRowIdx[] - arrays that store row structure of
47881   supernodes, and NSuper - supernodes count
47882 * ChildrenNodesR[], ChildrenNodesI[] - arrays that store children nodes
47883   for each node
47884 * Node2Supernode[] - array that maps node indexes to supernodes
47885 * TrueArray[] - array[N] that has all of its elements set to True (this
47886   invariant is preserved on output)
47887 * Tmp0[] - array[N], temporary array
47888 
47889 As output, it constructs nonzero pattern (diagonal element  not  included)
47890 of  the  column #ColumnIdx on top  of  SuperRowIdx[]  array,  starting  at
47891 location    SuperRowIdx[SuperRowRIdx[NSuper]]     and    till     location
47892 SuperRowIdx[Result-1], where Result is a function result.
47893 
47894 The SuperRowIdx[] array is automatically resized as needed.
47895 
47896 It is important that this function computes nonzero pattern, but  it  does
47897 NOT change other supernodal structures. The caller still has  to  finalize
47898 the column (setup supernode ranges, mappings, etc).
47899 
47900   -- ALGLIB routine --
47901      20.09.2020
47902      Bochkanov Sergey
47903 *************************************************************************/
spchol_computenonzeropattern(sparsematrix * wrkat,ae_int_t columnidx,ae_int_t n,ae_vector * superrowridx,ae_vector * superrowidx,ae_int_t nsuper,ae_vector * childrennodesr,ae_vector * childrennodesi,ae_vector * node2supernode,ae_vector * truearray,ae_vector * tmp0,ae_state * _state)47904 static ae_int_t spchol_computenonzeropattern(sparsematrix* wrkat,
47905      ae_int_t columnidx,
47906      ae_int_t n,
47907      /* Integer */ ae_vector* superrowridx,
47908      /* Integer */ ae_vector* superrowidx,
47909      ae_int_t nsuper,
47910      /* Integer */ ae_vector* childrennodesr,
47911      /* Integer */ ae_vector* childrennodesi,
47912      /* Integer */ ae_vector* node2supernode,
47913      /* Boolean */ ae_vector* truearray,
47914      /* Integer */ ae_vector* tmp0,
47915      ae_state *_state)
47916 {
47917     ae_int_t i;
47918     ae_int_t ii;
47919     ae_int_t jj;
47920     ae_int_t i0;
47921     ae_int_t i1;
47922     ae_int_t j0;
47923     ae_int_t j1;
47924     ae_int_t cidx;
47925     ae_int_t rfirst;
47926     ae_int_t rlast;
47927     ae_int_t tfirst;
47928     ae_int_t tlast;
47929     ae_int_t supernodalchildrencount;
47930     ae_int_t result;
47931 
47932 
47933     ae_assert(truearray->cnt>=n, "ComputeNonzeroPattern: input temporary is too short", _state);
47934     ae_assert(tmp0->cnt>=n, "ComputeNonzeroPattern: input temporary is too short", _state);
47935 
47936     /*
47937      * Determine supernodal children in Tmp0
47938      */
47939     supernodalchildrencount = 0;
47940     i0 = childrennodesr->ptr.p_int[columnidx];
47941     i1 = childrennodesr->ptr.p_int[columnidx+1]-1;
47942     for(ii=i0; ii<=i1; ii++)
47943     {
47944         i = node2supernode->ptr.p_int[childrennodesi->ptr.p_int[ii]];
47945         if( truearray->ptr.p_bool[i] )
47946         {
47947             tmp0->ptr.p_int[supernodalchildrencount] = i;
47948             truearray->ptr.p_bool[i] = ae_false;
47949             supernodalchildrencount = supernodalchildrencount+1;
47950         }
47951     }
47952     for(i=0; i<=supernodalchildrencount-1; i++)
47953     {
47954         truearray->ptr.p_bool[tmp0->ptr.p_int[i]] = ae_true;
47955     }
47956 
47957     /*
47958      * Initialized column by nonzero pattern from A
47959      */
47960     rfirst = superrowridx->ptr.p_int[nsuper];
47961     tfirst = rfirst+n;
47962     igrowv(rfirst+2*n, superrowidx, _state);
47963     i0 = wrkat->ridx.ptr.p_int[columnidx]+1;
47964     i1 = wrkat->ridx.ptr.p_int[columnidx+1];
47965     icopyvx(i1-i0, &wrkat->idx, i0, superrowidx, rfirst, _state);
47966     rlast = rfirst+(i1-i0);
47967 
47968     /*
47969      * For column with small number of children use ordered merge algorithm.
47970      * For column with many children it is better to perform unsorted merge,
47971      * and then sort the sequence.
47972      */
47973     if( supernodalchildrencount<=4 )
47974     {
47975 
47976         /*
47977          * Ordered merge. The best approach for small number of children,
47978          * but may have O(N^2) running time when O(N) children are present.
47979          */
47980         for(cidx=0; cidx<=supernodalchildrencount-1; cidx++)
47981         {
47982 
47983             /*
47984              * Skip initial elements that do not contribute to subdiagonal nonzero pattern
47985              */
47986             i0 = superrowridx->ptr.p_int[tmp0->ptr.p_int[cidx]];
47987             i1 = superrowridx->ptr.p_int[tmp0->ptr.p_int[cidx]+1]-1;
47988             while(i0<=i1&&superrowidx->ptr.p_int[i0]<=columnidx)
47989             {
47990                 i0 = i0+1;
47991             }
47992             j0 = rfirst;
47993             j1 = rlast-1;
47994 
47995             /*
47996              * Handle degenerate cases: empty merge target or empty merge source.
47997              */
47998             if( j1<j0 )
47999             {
48000                 icopyvx(i1-i0+1, superrowidx, i0, superrowidx, rlast, _state);
48001                 rlast = rlast+(i1-i0+1);
48002                 continue;
48003             }
48004             if( i1<i0 )
48005             {
48006                 continue;
48007             }
48008 
48009             /*
48010              * General case: two non-empty sorted sequences given by [I0,I1] and [J0,J1],
48011              * have to be merged and stored into [RFirst,RLast).
48012              */
48013             ii = superrowidx->ptr.p_int[i0];
48014             jj = superrowidx->ptr.p_int[j0];
48015             tlast = tfirst;
48016             for(;;)
48017             {
48018                 if( ii<jj )
48019                 {
48020                     superrowidx->ptr.p_int[tlast] = ii;
48021                     tlast = tlast+1;
48022                     i0 = i0+1;
48023                     if( i0>i1 )
48024                     {
48025                         break;
48026                     }
48027                     ii = superrowidx->ptr.p_int[i0];
48028                 }
48029                 if( jj<ii )
48030                 {
48031                     superrowidx->ptr.p_int[tlast] = jj;
48032                     tlast = tlast+1;
48033                     j0 = j0+1;
48034                     if( j0>j1 )
48035                     {
48036                         break;
48037                     }
48038                     jj = superrowidx->ptr.p_int[j0];
48039                 }
48040                 if( jj==ii )
48041                 {
48042                     superrowidx->ptr.p_int[tlast] = ii;
48043                     tlast = tlast+1;
48044                     i0 = i0+1;
48045                     j0 = j0+1;
48046                     if( i0>i1 )
48047                     {
48048                         break;
48049                     }
48050                     if( j0>j1 )
48051                     {
48052                         break;
48053                     }
48054                     ii = superrowidx->ptr.p_int[i0];
48055                     jj = superrowidx->ptr.p_int[j0];
48056                 }
48057             }
48058             for(ii=i0; ii<=i1; ii++)
48059             {
48060                 superrowidx->ptr.p_int[tlast] = superrowidx->ptr.p_int[ii];
48061                 tlast = tlast+1;
48062             }
48063             for(jj=j0; jj<=j1; jj++)
48064             {
48065                 superrowidx->ptr.p_int[tlast] = superrowidx->ptr.p_int[jj];
48066                 tlast = tlast+1;
48067             }
48068             icopyvx(tlast-tfirst, superrowidx, tfirst, superrowidx, rfirst, _state);
48069             rlast = rfirst+(tlast-tfirst);
48070         }
48071         result = rlast;
48072     }
48073     else
48074     {
48075 
48076         /*
48077          * Unordered merge followed by sort. Guaranteed N*logN worst case.
48078          */
48079         for(ii=rfirst; ii<=rlast-1; ii++)
48080         {
48081             truearray->ptr.p_bool[superrowidx->ptr.p_int[ii]] = ae_false;
48082         }
48083         for(cidx=0; cidx<=supernodalchildrencount-1; cidx++)
48084         {
48085 
48086             /*
48087              * Skip initial elements that do not contribute to subdiagonal nonzero pattern
48088              */
48089             i0 = superrowridx->ptr.p_int[tmp0->ptr.p_int[cidx]];
48090             i1 = superrowridx->ptr.p_int[tmp0->ptr.p_int[cidx]+1]-1;
48091             while(i0<=i1&&superrowidx->ptr.p_int[i0]<=columnidx)
48092             {
48093                 i0 = i0+1;
48094             }
48095 
48096             /*
48097              * Append elements not present in the sequence
48098              */
48099             for(ii=i0; ii<=i1; ii++)
48100             {
48101                 i = superrowidx->ptr.p_int[ii];
48102                 if( truearray->ptr.p_bool[i] )
48103                 {
48104                     superrowidx->ptr.p_int[rlast] = i;
48105                     rlast = rlast+1;
48106                     truearray->ptr.p_bool[i] = ae_false;
48107                 }
48108             }
48109         }
48110         for(ii=rfirst; ii<=rlast-1; ii++)
48111         {
48112             truearray->ptr.p_bool[superrowidx->ptr.p_int[ii]] = ae_true;
48113         }
48114         tagsortmiddlei(superrowidx, rfirst, rlast-rfirst, _state);
48115         result = rlast;
48116     }
48117     return result;
48118 }
48119 
48120 
48121 /*************************************************************************
48122 Update target supernode with data from one of its children. This operation
48123 is a supernodal equivalent  of  the  column  update  in  the  left-looking
48124 Cholesky.
48125 
48126 The generic update has following form:
48127 
48128     S := S - scatter(U*D*Uc')
48129 
48130 where
48131 * S is an tHeight*tWidth rectangular target matrix that is:
48132   * stored with tStride>=tWidth in RowStorage[OffsS:OffsS+tHeight*tStride-1]
48133   * lower trapezoidal i.e. its leading tWidth*tWidth  submatrix  is  lower
48134     triangular. One may update either entire  tWidth*tWidth  submatrix  or
48135     just its lower part, because upper triangle is not referenced anyway.
48136   * the height of S is not given because it is not actually needed
48137 * U is an uHeight*uRank rectangular update matrix tht is:
48138   * stored with row stride uStride>=uRank in RowStorage[OffsU:OffsU+uHeight*uStride-1].
48139 * Uc is the leading uWidth*uRank submatrix of U
48140 * D is uRank*uRank diagonal matrix that is:
48141   * stored in DiagD[OffsD:OffsD+uRank-1]
48142   * unit, when Analysis.UnitD=True. In this case it can be ignored, although
48143     DiagD still contains 1's in all of its entries
48144 * uHeight<=tHeight, uWidth<=tWidth, so scatter operation is needed to update
48145   S with smaller update.
48146 * scatter() is an operation  that  extends  smaller  uHeight*uWidth update
48147   matrix U*Uc' into larger tHeight*tWidth target matrix by adding zero rows
48148   and columns into U*Uc':
48149   * I-th row of update modifies Raw2SMap[SuperRowIdx[URBase+I]]-th row  of
48150     the matrix S
48151   * J-th column of update modifies Raw2SMap[SuperRowIdx[URBase+J]]-th  col
48152     of the matrix S
48153 
48154   -- ALGLIB routine --
48155      20.09.2020
48156      Bochkanov Sergey
48157 *************************************************************************/
spchol_updatesupernode(spcholanalysis * analysis,ae_int_t sidx,ae_int_t cols0,ae_int_t cols1,ae_int_t offss,ae_vector * raw2smap,ae_int_t uidx,ae_int_t wrkrow,ae_vector * diagd,ae_int_t offsd,ae_state * _state)48158 static ae_int_t spchol_updatesupernode(spcholanalysis* analysis,
48159      ae_int_t sidx,
48160      ae_int_t cols0,
48161      ae_int_t cols1,
48162      ae_int_t offss,
48163      /* Integer */ ae_vector* raw2smap,
48164      ae_int_t uidx,
48165      ae_int_t wrkrow,
48166      /* Real    */ ae_vector* diagd,
48167      ae_int_t offsd,
48168      ae_state *_state)
48169 {
48170     ae_int_t i;
48171     ae_int_t j;
48172     ae_int_t k;
48173     ae_int_t colu0;
48174     ae_int_t colu1;
48175     ae_int_t urbase;
48176     ae_int_t urlast;
48177     ae_int_t urank;
48178     ae_int_t uwidth;
48179     ae_int_t uheight;
48180     ae_int_t urowstride;
48181     ae_int_t twidth;
48182     ae_int_t theight;
48183     ae_int_t trowstride;
48184     ae_int_t targetrow;
48185     ae_int_t targetcol;
48186     ae_int_t offsu;
48187     ae_int_t offdiagrow;
48188     ae_int_t lastrow;
48189     ae_int_t offs0;
48190     ae_int_t offsj;
48191     ae_int_t offsk;
48192     double v;
48193     ae_int_t result;
48194 
48195 
48196     twidth = cols1-cols0;
48197     theight = twidth+(analysis->superrowridx.ptr.p_int[sidx+1]-analysis->superrowridx.ptr.p_int[sidx]);
48198     offsu = analysis->rowoffsets.ptr.p_int[uidx];
48199     colu0 = analysis->supercolrange.ptr.p_int[uidx];
48200     colu1 = analysis->supercolrange.ptr.p_int[uidx+1];
48201     urbase = analysis->superrowridx.ptr.p_int[uidx];
48202     urlast = analysis->superrowridx.ptr.p_int[uidx+1];
48203     urank = colu1-colu0;
48204     trowstride = analysis->rowstrides.ptr.p_int[sidx];
48205     urowstride = analysis->rowstrides.ptr.p_int[uidx];
48206 
48207     /*
48208      * Skip leading uRank+WrkRow rows of U because they are not used.
48209      */
48210     offsu = offsu+(colu1-colu0+wrkrow)*urowstride;
48211 
48212     /*
48213      * Analyze range of rows in supernode LAdjPlus[II] and determine two subranges:
48214      * * one with indexes stored at SuperRowIdx[WrkRow:OffdiagRow);
48215      *   these indexes are the ones that intersect with range of rows/columns [ColS0,ColS1)
48216      *   occupied by diagonal block of the supernode SIdx
48217      * * one with indexes stored at SuperRowIdx[OffdiagRow:LastRow);
48218      *   these indexes are ones that intersect with range of rows occupied by
48219      *   offdiagonal block of the supernode SIdx
48220      */
48221     if( analysis->extendeddebug )
48222     {
48223         ae_assert(analysis->superrowidx.ptr.p_int[urbase+wrkrow]>=cols0, "SPCholFactorize: integrity check 6378 failed", _state);
48224         ae_assert(analysis->superrowidx.ptr.p_int[urbase+wrkrow]<cols1, "SPCholFactorize: integrity check 6729 failed", _state);
48225     }
48226     offdiagrow = wrkrow;
48227     lastrow = urlast-urbase;
48228     while(offdiagrow<lastrow&&analysis->superrowidx.ptr.p_int[offdiagrow+urbase]<cols1)
48229     {
48230         offdiagrow = offdiagrow+1;
48231     }
48232     uwidth = offdiagrow-wrkrow;
48233     uheight = lastrow-wrkrow;
48234     result = offdiagrow;
48235     if( analysis->extendeddebug )
48236     {
48237 
48238         /*
48239          * Extended integrity check (if requested)
48240          */
48241         ae_assert(wrkrow<offdiagrow&&analysis->superrowidx.ptr.p_int[wrkrow+urbase]>=cols0, "SPCholFactorize: integrity check failed (44trg6)", _state);
48242         for(i=wrkrow; i<=lastrow-1; i++)
48243         {
48244             ae_assert(raw2smap->ptr.p_int[analysis->superrowidx.ptr.p_int[i+urbase]]>=0, "SPCholFactorize: integrity check failed (43t63)", _state);
48245         }
48246     }
48247 
48248     /*
48249      * Handle special cases
48250      */
48251     if( trowstride==4 )
48252     {
48253 
48254         /*
48255          * Target is stride-4 column, try several kernels that may work with tWidth=3 and tWidth=4
48256          */
48257         if( ((uwidth==4&&twidth==4)&&urank==4)&&urowstride==4 )
48258         {
48259             if( spchol_updatekernel4444(&analysis->outputstorage, offss, theight, offsu, uheight, &analysis->diagd, colu0, raw2smap, &analysis->superrowidx, urbase+wrkrow, _state) )
48260             {
48261                 return result;
48262             }
48263         }
48264         if( spchol_updatekernelabc4(&analysis->outputstorage, offss, twidth, offsu, uheight, urank, urowstride, uwidth, &analysis->diagd, colu0, raw2smap, &analysis->superrowidx, urbase+wrkrow, _state) )
48265         {
48266             return result;
48267         }
48268     }
48269     if( urank==1&&urowstride==1 )
48270     {
48271         if( spchol_updatekernelrank1(&analysis->outputstorage, offss, twidth, trowstride, offsu, uheight, uwidth, &analysis->diagd, colu0, raw2smap, &analysis->superrowidx, urbase+wrkrow, _state) )
48272         {
48273             return result;
48274         }
48275     }
48276     if( urank==2&&urowstride==2 )
48277     {
48278         if( spchol_updatekernelrank2(&analysis->outputstorage, offss, twidth, trowstride, offsu, uheight, uwidth, &analysis->diagd, colu0, raw2smap, &analysis->superrowidx, urbase+wrkrow, _state) )
48279         {
48280             return result;
48281         }
48282     }
48283 
48284     /*
48285      * Handle general update, rerefence code
48286      */
48287     ivectorsetlengthatleast(&analysis->u2smap, uheight, _state);
48288     for(i=0; i<=uheight-1; i++)
48289     {
48290         analysis->u2smap.ptr.p_int[i] = raw2smap->ptr.p_int[analysis->superrowidx.ptr.p_int[urbase+wrkrow+i]];
48291     }
48292     if( analysis->unitd )
48293     {
48294 
48295         /*
48296          * Unit D, vanilla Cholesky
48297          */
48298         for(k=0; k<=uheight-1; k++)
48299         {
48300             targetrow = offss+analysis->u2smap.ptr.p_int[k]*trowstride;
48301             for(j=0; j<=uwidth-1; j++)
48302             {
48303                 targetcol = analysis->u2smap.ptr.p_int[j];
48304                 offsj = offsu+j*urowstride;
48305                 offsk = offsu+k*urowstride;
48306                 offs0 = targetrow+targetcol;
48307                 v = analysis->outputstorage.ptr.p_double[offs0];
48308                 for(i=0; i<=urank-1; i++)
48309                 {
48310                     v = v-analysis->outputstorage.ptr.p_double[offsj+i]*analysis->outputstorage.ptr.p_double[offsk+i];
48311                 }
48312                 analysis->outputstorage.ptr.p_double[offs0] = v;
48313             }
48314         }
48315     }
48316     else
48317     {
48318 
48319         /*
48320          * Non-unit D, LDLT decomposition
48321          */
48322         for(k=0; k<=uheight-1; k++)
48323         {
48324             targetrow = offss+analysis->u2smap.ptr.p_int[k]*trowstride;
48325             for(j=0; j<=uwidth-1; j++)
48326             {
48327                 targetcol = analysis->u2smap.ptr.p_int[j];
48328                 offsj = offsu+j*urowstride;
48329                 offsk = offsu+k*urowstride;
48330                 offs0 = targetrow+targetcol;
48331                 v = analysis->outputstorage.ptr.p_double[offs0];
48332                 for(i=0; i<=urank-1; i++)
48333                 {
48334                     v = v-analysis->outputstorage.ptr.p_double[offsj+i]*diagd->ptr.p_double[offsd+i]*analysis->outputstorage.ptr.p_double[offsk+i];
48335                 }
48336                 analysis->outputstorage.ptr.p_double[offs0] = v;
48337             }
48338         }
48339     }
48340     return result;
48341 }
48342 
48343 
48344 /*************************************************************************
48345 Factorizes target supernode, returns True on success, False on failure.
48346 
48347   -- ALGLIB routine --
48348      20.09.2020
48349      Bochkanov Sergey
48350 *************************************************************************/
spchol_factorizesupernode(spcholanalysis * analysis,ae_int_t sidx,ae_state * _state)48351 static ae_bool spchol_factorizesupernode(spcholanalysis* analysis,
48352      ae_int_t sidx,
48353      ae_state *_state)
48354 {
48355     ae_int_t i;
48356     ae_int_t j;
48357     ae_int_t k;
48358     ae_int_t cols0;
48359     ae_int_t cols1;
48360     ae_int_t offss;
48361     ae_int_t blocksize;
48362     ae_int_t offdiagsize;
48363     ae_int_t sstride;
48364     double v;
48365     double vs;
48366     double possignvraw;
48367     ae_bool controlpivot;
48368     ae_bool controloverflow;
48369     ae_bool result;
48370 
48371 
48372     cols0 = analysis->supercolrange.ptr.p_int[sidx];
48373     cols1 = analysis->supercolrange.ptr.p_int[sidx+1];
48374     offss = analysis->rowoffsets.ptr.p_int[sidx];
48375     blocksize = cols1-cols0;
48376     offdiagsize = analysis->superrowridx.ptr.p_int[sidx+1]-analysis->superrowridx.ptr.p_int[sidx];
48377     sstride = analysis->rowstrides.ptr.p_int[sidx];
48378     controlpivot = analysis->modtype==1&&ae_fp_greater(analysis->modparam0,(double)(0));
48379     controloverflow = analysis->modtype==1&&ae_fp_greater(analysis->modparam1,(double)(0));
48380     if( analysis->unitd )
48381     {
48382 
48383         /*
48384          * Classic Cholesky
48385          */
48386         for(j=0; j<=blocksize-1; j++)
48387         {
48388 
48389             /*
48390              * Compute J-th column
48391              */
48392             vs = (double)(0);
48393             for(k=j; k<=blocksize+offdiagsize-1; k++)
48394             {
48395                 v = analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
48396                 for(i=0; i<=j-1; i++)
48397                 {
48398                     v = v-analysis->outputstorage.ptr.p_double[offss+k*sstride+i]*analysis->outputstorage.ptr.p_double[offss+j*sstride+i];
48399                 }
48400                 analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v;
48401                 vs = vs+ae_fabs(v, _state);
48402             }
48403             if( controloverflow&&ae_fp_greater(vs,analysis->modparam1) )
48404             {
48405 
48406                 /*
48407                  * Possible failure due to accumulation of numerical errors
48408                  */
48409                 result = ae_false;
48410                 return result;
48411             }
48412 
48413             /*
48414              * Handle pivot element
48415              */
48416             v = analysis->outputstorage.ptr.p_double[offss+j*sstride+j];
48417             if( controlpivot&&ae_fp_less_eq(v,analysis->modparam0) )
48418             {
48419 
48420                 /*
48421                  * Basic modified Cholesky
48422                  */
48423                 v = ae_sqrt(analysis->modparam0, _state);
48424                 analysis->diagd.ptr.p_double[cols0+j] = 1.0;
48425                 analysis->outputstorage.ptr.p_double[offss+j*sstride+j] = v;
48426                 v = 1/v;
48427                 for(k=j+1; k<=blocksize+offdiagsize-1; k++)
48428                 {
48429                     analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v*analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
48430                 }
48431             }
48432             else
48433             {
48434 
48435                 /*
48436                  * Default case
48437                  */
48438                 if( ae_fp_less_eq(v,(double)(0)) )
48439                 {
48440                     result = ae_false;
48441                     return result;
48442                 }
48443                 analysis->diagd.ptr.p_double[cols0+j] = 1.0;
48444                 v = 1/ae_sqrt(v, _state);
48445                 for(k=j; k<=blocksize+offdiagsize-1; k++)
48446                 {
48447                     analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v*analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
48448                 }
48449             }
48450         }
48451     }
48452     else
48453     {
48454 
48455         /*
48456          * LDLT with diagonal D
48457          */
48458         for(j=0; j<=blocksize-1; j++)
48459         {
48460 
48461             /*
48462              * Compute J-th column
48463              */
48464             vs = (double)(0);
48465             for(k=j; k<=blocksize+offdiagsize-1; k++)
48466             {
48467                 v = analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
48468                 for(i=0; i<=j-1; i++)
48469                 {
48470                     v = v-analysis->outputstorage.ptr.p_double[offss+k*sstride+i]*analysis->diagd.ptr.p_double[cols0+i]*analysis->outputstorage.ptr.p_double[offss+j*sstride+i];
48471                 }
48472                 analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v;
48473                 vs = vs+ae_fabs(v, _state);
48474             }
48475             if( controloverflow&&ae_fp_greater(vs,analysis->modparam1) )
48476             {
48477 
48478                 /*
48479                  * Possible failure due to accumulation of numerical errors
48480                  */
48481                 result = ae_false;
48482                 return result;
48483             }
48484 
48485             /*
48486              * Handle pivot element
48487              */
48488             possignvraw = possign(analysis->inputstorage.ptr.p_double[offss+j*sstride+j], _state);
48489             v = analysis->outputstorage.ptr.p_double[offss+j*sstride+j];
48490             if( controlpivot&&ae_fp_less_eq(v/possignvraw,analysis->modparam0) )
48491             {
48492 
48493                 /*
48494                  * Basic modified LDLT
48495                  */
48496                 v = possignvraw*analysis->modparam0;
48497                 analysis->diagd.ptr.p_double[cols0+j] = v;
48498                 analysis->outputstorage.ptr.p_double[offss+j*sstride+j] = 1.0;
48499                 v = 1/v;
48500                 for(k=j+1; k<=blocksize+offdiagsize-1; k++)
48501                 {
48502                     analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v*analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
48503                 }
48504             }
48505             else
48506             {
48507 
48508                 /*
48509                  * Unmodified LDLT
48510                  */
48511                 if( ae_fp_eq(v,(double)(0)) )
48512                 {
48513                     result = ae_false;
48514                     return result;
48515                 }
48516                 analysis->diagd.ptr.p_double[cols0+j] = v;
48517                 v = 1/v;
48518                 for(k=j; k<=blocksize+offdiagsize-1; k++)
48519                 {
48520                     analysis->outputstorage.ptr.p_double[offss+k*sstride+j] = v*analysis->outputstorage.ptr.p_double[offss+k*sstride+j];
48521                 }
48522             }
48523         }
48524     }
48525     result = ae_true;
48526     return result;
48527 }
48528 
48529 
48530 /*************************************************************************
48531 This function returns recommended stride for given row size
48532 
48533   -- ALGLIB routine --
48534      20.10.2020
48535      Bochkanov Sergey
48536 *************************************************************************/
spchol_recommendedstridefor(ae_int_t rowsize,ae_state * _state)48537 static ae_int_t spchol_recommendedstridefor(ae_int_t rowsize,
48538      ae_state *_state)
48539 {
48540     ae_int_t result;
48541 
48542 
48543     result = rowsize;
48544     if( rowsize==3 )
48545     {
48546         result = 4;
48547     }
48548     return result;
48549 }
48550 
48551 
48552 /*************************************************************************
48553 This function aligns position in array in order to  better  accommodate to
48554 SIMD specifics.
48555 
48556 NOTE: this function aligns position measured in double precision  numbers,
48557       not in bits or bytes. If you want to have 256-bit aligned  position,
48558       round Offs to nearest multiple of 4 that is not less than Offs.
48559 
48560   -- ALGLIB routine --
48561      20.10.2020
48562      Bochkanov Sergey
48563 *************************************************************************/
spchol_alignpositioninarray(ae_int_t offs,ae_state * _state)48564 static ae_int_t spchol_alignpositioninarray(ae_int_t offs,
48565      ae_state *_state)
48566 {
48567     ae_int_t result;
48568 
48569 
48570     result = offs;
48571     if( offs%4!=0 )
48572     {
48573         result = result+(4-offs%4);
48574     }
48575     return result;
48576 }
48577 
48578 
48579 #ifdef ALGLIB_NO_FAST_KERNELS
48580 /*************************************************************************
48581 Fast kernels for small supernodal updates: special 4x4x4x4 function.
48582 
48583 ! See comments on UpdateSupernode() for information  on generic supernodal
48584 ! updates, including notation used below.
48585 
48586 The generic update has following form:
48587 
48588     S := S - scatter(U*D*Uc')
48589 
48590 This specialized function performs 4x4x4x4 update, i.e.:
48591 * S is a tHeight*4 matrix
48592 * U is a uHeight*4 matrix
48593 * Uc' is a 4*4 matrix
48594 * scatter() scatters rows of U*Uc', but does not scatter columns (they are
48595   densely packed).
48596 
48597 Return value:
48598 * True if update was applied
48599 * False if kernel refused to perform an update.
48600 
48601   -- ALGLIB routine --
48602      20.09.2020
48603      Bochkanov Sergey
48604 *************************************************************************/
spchol_updatekernel4444(ae_vector * rowstorage,ae_int_t offss,ae_int_t sheight,ae_int_t offsu,ae_int_t uheight,ae_vector * diagd,ae_int_t offsd,ae_vector * raw2smap,ae_vector * superrowidx,ae_int_t urbase,ae_state * _state)48605 static ae_bool spchol_updatekernel4444(/* Real    */ ae_vector* rowstorage,
48606      ae_int_t offss,
48607      ae_int_t sheight,
48608      ae_int_t offsu,
48609      ae_int_t uheight,
48610      /* Real    */ ae_vector* diagd,
48611      ae_int_t offsd,
48612      /* Integer */ ae_vector* raw2smap,
48613      /* Integer */ ae_vector* superrowidx,
48614      ae_int_t urbase,
48615      ae_state *_state)
48616 {
48617     ae_int_t k;
48618     ae_int_t targetrow;
48619     ae_int_t offsk;
48620     double d0;
48621     double d1;
48622     double d2;
48623     double d3;
48624     double u00;
48625     double u01;
48626     double u02;
48627     double u03;
48628     double u10;
48629     double u11;
48630     double u12;
48631     double u13;
48632     double u20;
48633     double u21;
48634     double u22;
48635     double u23;
48636     double u30;
48637     double u31;
48638     double u32;
48639     double u33;
48640     double uk0;
48641     double uk1;
48642     double uk2;
48643     double uk3;
48644     ae_bool result;
48645 
48646 
48647     d0 = diagd->ptr.p_double[offsd+0];
48648     d1 = diagd->ptr.p_double[offsd+1];
48649     d2 = diagd->ptr.p_double[offsd+2];
48650     d3 = diagd->ptr.p_double[offsd+3];
48651     u00 = d0*rowstorage->ptr.p_double[offsu+0*4+0];
48652     u01 = d1*rowstorage->ptr.p_double[offsu+0*4+1];
48653     u02 = d2*rowstorage->ptr.p_double[offsu+0*4+2];
48654     u03 = d3*rowstorage->ptr.p_double[offsu+0*4+3];
48655     u10 = d0*rowstorage->ptr.p_double[offsu+1*4+0];
48656     u11 = d1*rowstorage->ptr.p_double[offsu+1*4+1];
48657     u12 = d2*rowstorage->ptr.p_double[offsu+1*4+2];
48658     u13 = d3*rowstorage->ptr.p_double[offsu+1*4+3];
48659     u20 = d0*rowstorage->ptr.p_double[offsu+2*4+0];
48660     u21 = d1*rowstorage->ptr.p_double[offsu+2*4+1];
48661     u22 = d2*rowstorage->ptr.p_double[offsu+2*4+2];
48662     u23 = d3*rowstorage->ptr.p_double[offsu+2*4+3];
48663     u30 = d0*rowstorage->ptr.p_double[offsu+3*4+0];
48664     u31 = d1*rowstorage->ptr.p_double[offsu+3*4+1];
48665     u32 = d2*rowstorage->ptr.p_double[offsu+3*4+2];
48666     u33 = d3*rowstorage->ptr.p_double[offsu+3*4+3];
48667     for(k=0; k<=uheight-1; k++)
48668     {
48669         targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
48670         offsk = offsu+k*4;
48671         uk0 = rowstorage->ptr.p_double[offsk+0];
48672         uk1 = rowstorage->ptr.p_double[offsk+1];
48673         uk2 = rowstorage->ptr.p_double[offsk+2];
48674         uk3 = rowstorage->ptr.p_double[offsk+3];
48675         rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
48676         rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
48677         rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
48678         rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
48679     }
48680     result = ae_true;
48681     return result;
48682 }
48683 #endif
48684 
48685 
48686 #ifdef ALGLIB_NO_FAST_KERNELS
48687 /*************************************************************************
48688 Fast kernels for small supernodal updates: special 4x4x4x4 function.
48689 
48690 ! See comments on UpdateSupernode() for information  on generic supernodal
48691 ! updates, including notation used below.
48692 
48693 The generic update has following form:
48694 
48695     S := S - scatter(U*D*Uc')
48696 
48697 This specialized function performs AxBxCx4 update, i.e.:
48698 * S is a tHeight*A matrix with row stride equal to 4 (usually it means that
48699   it has 3 or 4 columns)
48700 * U is a uHeight*B matrix
48701 * Uc' is a B*C matrix, with C<=A
48702 * scatter() scatters rows and columns of U*Uc'
48703 
48704 Return value:
48705 * True if update was applied
48706 * False if kernel refused to perform an update (quick exit for unsupported
48707   combinations of input sizes)
48708 
48709   -- ALGLIB routine --
48710      20.09.2020
48711      Bochkanov Sergey
48712 *************************************************************************/
spchol_updatekernelabc4(ae_vector * rowstorage,ae_int_t offss,ae_int_t twidth,ae_int_t offsu,ae_int_t uheight,ae_int_t urank,ae_int_t urowstride,ae_int_t uwidth,ae_vector * diagd,ae_int_t offsd,ae_vector * raw2smap,ae_vector * superrowidx,ae_int_t urbase,ae_state * _state)48713 static ae_bool spchol_updatekernelabc4(/* Real    */ ae_vector* rowstorage,
48714      ae_int_t offss,
48715      ae_int_t twidth,
48716      ae_int_t offsu,
48717      ae_int_t uheight,
48718      ae_int_t urank,
48719      ae_int_t urowstride,
48720      ae_int_t uwidth,
48721      /* Real    */ ae_vector* diagd,
48722      ae_int_t offsd,
48723      /* Integer */ ae_vector* raw2smap,
48724      /* Integer */ ae_vector* superrowidx,
48725      ae_int_t urbase,
48726      ae_state *_state)
48727 {
48728     ae_int_t k;
48729     ae_int_t targetrow;
48730     ae_int_t targetcol;
48731     ae_int_t offsk;
48732     double d0;
48733     double d1;
48734     double d2;
48735     double d3;
48736     double u00;
48737     double u01;
48738     double u02;
48739     double u03;
48740     double u10;
48741     double u11;
48742     double u12;
48743     double u13;
48744     double u20;
48745     double u21;
48746     double u22;
48747     double u23;
48748     double u30;
48749     double u31;
48750     double u32;
48751     double u33;
48752     double uk0;
48753     double uk1;
48754     double uk2;
48755     double uk3;
48756     ae_int_t srccol0;
48757     ae_int_t srccol1;
48758     ae_int_t srccol2;
48759     ae_int_t srccol3;
48760     ae_bool result;
48761 
48762 
48763 
48764     /*
48765      * Filter out unsupported combinations (ones that are too sparse for the non-SIMD code)
48766      */
48767     result = ae_false;
48768     if( twidth<3||twidth>4 )
48769     {
48770         return result;
48771     }
48772     if( uwidth<3||uwidth>4 )
48773     {
48774         return result;
48775     }
48776     if( urank>4 )
48777     {
48778         return result;
48779     }
48780 
48781     /*
48782      * Determine source columns for target columns, -1 if target column
48783      * is not updated.
48784      */
48785     srccol0 = -1;
48786     srccol1 = -1;
48787     srccol2 = -1;
48788     srccol3 = -1;
48789     for(k=0; k<=uwidth-1; k++)
48790     {
48791         targetcol = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]];
48792         if( targetcol==0 )
48793         {
48794             srccol0 = k;
48795         }
48796         if( targetcol==1 )
48797         {
48798             srccol1 = k;
48799         }
48800         if( targetcol==2 )
48801         {
48802             srccol2 = k;
48803         }
48804         if( targetcol==3 )
48805         {
48806             srccol3 = k;
48807         }
48808     }
48809 
48810     /*
48811      * Load update matrix into aligned/rearranged 4x4 storage
48812      */
48813     d0 = (double)(0);
48814     d1 = (double)(0);
48815     d2 = (double)(0);
48816     d3 = (double)(0);
48817     u00 = (double)(0);
48818     u01 = (double)(0);
48819     u02 = (double)(0);
48820     u03 = (double)(0);
48821     u10 = (double)(0);
48822     u11 = (double)(0);
48823     u12 = (double)(0);
48824     u13 = (double)(0);
48825     u20 = (double)(0);
48826     u21 = (double)(0);
48827     u22 = (double)(0);
48828     u23 = (double)(0);
48829     u30 = (double)(0);
48830     u31 = (double)(0);
48831     u32 = (double)(0);
48832     u33 = (double)(0);
48833     if( urank>=1 )
48834     {
48835         d0 = diagd->ptr.p_double[offsd+0];
48836     }
48837     if( urank>=2 )
48838     {
48839         d1 = diagd->ptr.p_double[offsd+1];
48840     }
48841     if( urank>=3 )
48842     {
48843         d2 = diagd->ptr.p_double[offsd+2];
48844     }
48845     if( urank>=4 )
48846     {
48847         d3 = diagd->ptr.p_double[offsd+3];
48848     }
48849     if( srccol0>=0 )
48850     {
48851         if( urank>=1 )
48852         {
48853             u00 = d0*rowstorage->ptr.p_double[offsu+srccol0*urowstride+0];
48854         }
48855         if( urank>=2 )
48856         {
48857             u01 = d1*rowstorage->ptr.p_double[offsu+srccol0*urowstride+1];
48858         }
48859         if( urank>=3 )
48860         {
48861             u02 = d2*rowstorage->ptr.p_double[offsu+srccol0*urowstride+2];
48862         }
48863         if( urank>=4 )
48864         {
48865             u03 = d3*rowstorage->ptr.p_double[offsu+srccol0*urowstride+3];
48866         }
48867     }
48868     if( srccol1>=0 )
48869     {
48870         if( urank>=1 )
48871         {
48872             u10 = d0*rowstorage->ptr.p_double[offsu+srccol1*urowstride+0];
48873         }
48874         if( urank>=2 )
48875         {
48876             u11 = d1*rowstorage->ptr.p_double[offsu+srccol1*urowstride+1];
48877         }
48878         if( urank>=3 )
48879         {
48880             u12 = d2*rowstorage->ptr.p_double[offsu+srccol1*urowstride+2];
48881         }
48882         if( urank>=4 )
48883         {
48884             u13 = d3*rowstorage->ptr.p_double[offsu+srccol1*urowstride+3];
48885         }
48886     }
48887     if( srccol2>=0 )
48888     {
48889         if( urank>=1 )
48890         {
48891             u20 = d0*rowstorage->ptr.p_double[offsu+srccol2*urowstride+0];
48892         }
48893         if( urank>=2 )
48894         {
48895             u21 = d1*rowstorage->ptr.p_double[offsu+srccol2*urowstride+1];
48896         }
48897         if( urank>=3 )
48898         {
48899             u22 = d2*rowstorage->ptr.p_double[offsu+srccol2*urowstride+2];
48900         }
48901         if( urank>=4 )
48902         {
48903             u23 = d3*rowstorage->ptr.p_double[offsu+srccol2*urowstride+3];
48904         }
48905     }
48906     if( srccol3>=0 )
48907     {
48908         if( urank>=1 )
48909         {
48910             u30 = d0*rowstorage->ptr.p_double[offsu+srccol3*urowstride+0];
48911         }
48912         if( urank>=2 )
48913         {
48914             u31 = d1*rowstorage->ptr.p_double[offsu+srccol3*urowstride+1];
48915         }
48916         if( urank>=3 )
48917         {
48918             u32 = d2*rowstorage->ptr.p_double[offsu+srccol3*urowstride+2];
48919         }
48920         if( urank>=4 )
48921         {
48922             u33 = d3*rowstorage->ptr.p_double[offsu+srccol3*urowstride+3];
48923         }
48924     }
48925 
48926     /*
48927      * Run update
48928      */
48929     if( urank==1 )
48930     {
48931         for(k=0; k<=uheight-1; k++)
48932         {
48933             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
48934             offsk = offsu+k*urowstride;
48935             uk0 = rowstorage->ptr.p_double[offsk+0];
48936             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0;
48937             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0;
48938             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0;
48939             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0;
48940         }
48941     }
48942     if( urank==2 )
48943     {
48944         for(k=0; k<=uheight-1; k++)
48945         {
48946             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
48947             offsk = offsu+k*urowstride;
48948             uk0 = rowstorage->ptr.p_double[offsk+0];
48949             uk1 = rowstorage->ptr.p_double[offsk+1];
48950             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1;
48951             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1;
48952             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1;
48953             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1;
48954         }
48955     }
48956     if( urank==3 )
48957     {
48958         for(k=0; k<=uheight-1; k++)
48959         {
48960             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
48961             offsk = offsu+k*urowstride;
48962             uk0 = rowstorage->ptr.p_double[offsk+0];
48963             uk1 = rowstorage->ptr.p_double[offsk+1];
48964             uk2 = rowstorage->ptr.p_double[offsk+2];
48965             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2;
48966             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2;
48967             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2;
48968             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2;
48969         }
48970     }
48971     if( urank==4 )
48972     {
48973         for(k=0; k<=uheight-1; k++)
48974         {
48975             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
48976             offsk = offsu+k*urowstride;
48977             uk0 = rowstorage->ptr.p_double[offsk+0];
48978             uk1 = rowstorage->ptr.p_double[offsk+1];
48979             uk2 = rowstorage->ptr.p_double[offsk+2];
48980             uk3 = rowstorage->ptr.p_double[offsk+3];
48981             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
48982             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
48983             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
48984             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
48985         }
48986     }
48987     result = ae_true;
48988     return result;
48989 }
48990 #endif
48991 
48992 
48993 /*************************************************************************
48994 Fast kernels for small supernodal updates: special rank-1 function.
48995 
48996 ! See comments on UpdateSupernode() for information  on generic supernodal
48997 ! updates, including notation used below.
48998 
48999 The generic update has following form:
49000 
49001     S := S - scatter(U*D*Uc')
49002 
49003 This specialized function performs rank-1 update, i.e.:
49004 * S is a tHeight*A matrix, with A<=4
49005 * U is a uHeight*1 matrix with unit stride
49006 * Uc' is a 1*B matrix, with B<=A
49007 * scatter() scatters rows and columns of U*Uc'
49008 
49009 Return value:
49010 * True if update was applied
49011 * False if kernel refused to perform an update (quick exit for unsupported
49012   combinations of input sizes)
49013 
49014   -- ALGLIB routine --
49015      20.09.2020
49016      Bochkanov Sergey
49017 *************************************************************************/
spchol_updatekernelrank1(ae_vector * rowstorage,ae_int_t offss,ae_int_t twidth,ae_int_t trowstride,ae_int_t offsu,ae_int_t uheight,ae_int_t uwidth,ae_vector * diagd,ae_int_t offsd,ae_vector * raw2smap,ae_vector * superrowidx,ae_int_t urbase,ae_state * _state)49018 static ae_bool spchol_updatekernelrank1(/* Real    */ ae_vector* rowstorage,
49019      ae_int_t offss,
49020      ae_int_t twidth,
49021      ae_int_t trowstride,
49022      ae_int_t offsu,
49023      ae_int_t uheight,
49024      ae_int_t uwidth,
49025      /* Real    */ ae_vector* diagd,
49026      ae_int_t offsd,
49027      /* Integer */ ae_vector* raw2smap,
49028      /* Integer */ ae_vector* superrowidx,
49029      ae_int_t urbase,
49030      ae_state *_state)
49031 {
49032     ae_int_t k;
49033     ae_int_t targetrow;
49034     double d0;
49035     double u00;
49036     double u10;
49037     double u20;
49038     double u30;
49039     double uk;
49040     ae_int_t col0;
49041     ae_int_t col1;
49042     ae_int_t col2;
49043     ae_int_t col3;
49044     ae_bool result;
49045 
49046 
49047 
49048     /*
49049      * Filter out unsupported combinations (ones that are too sparse for the non-SIMD code)
49050      */
49051     result = ae_false;
49052     if( twidth>4 )
49053     {
49054         return result;
49055     }
49056     if( uwidth>4 )
49057     {
49058         return result;
49059     }
49060 
49061     /*
49062      * Determine target columns, load update matrix
49063      */
49064     d0 = diagd->ptr.p_double[offsd];
49065     col0 = 0;
49066     col1 = 0;
49067     col2 = 0;
49068     col3 = 0;
49069     u00 = (double)(0);
49070     u10 = (double)(0);
49071     u20 = (double)(0);
49072     u30 = (double)(0);
49073     if( uwidth>=1 )
49074     {
49075         col0 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+0]];
49076         u00 = d0*rowstorage->ptr.p_double[offsu+0];
49077     }
49078     if( uwidth>=2 )
49079     {
49080         col1 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+1]];
49081         u10 = d0*rowstorage->ptr.p_double[offsu+1];
49082     }
49083     if( uwidth>=3 )
49084     {
49085         col2 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+2]];
49086         u20 = d0*rowstorage->ptr.p_double[offsu+2];
49087     }
49088     if( uwidth>=4 )
49089     {
49090         col3 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+3]];
49091         u30 = d0*rowstorage->ptr.p_double[offsu+3];
49092     }
49093 
49094     /*
49095      * Run update
49096      */
49097     if( uwidth==1 )
49098     {
49099         for(k=0; k<=uheight-1; k++)
49100         {
49101             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
49102             uk = rowstorage->ptr.p_double[offsu+k];
49103             rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk;
49104         }
49105     }
49106     if( uwidth==2 )
49107     {
49108         for(k=0; k<=uheight-1; k++)
49109         {
49110             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
49111             uk = rowstorage->ptr.p_double[offsu+k];
49112             rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk;
49113             rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk;
49114         }
49115     }
49116     if( uwidth==3 )
49117     {
49118         for(k=0; k<=uheight-1; k++)
49119         {
49120             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
49121             uk = rowstorage->ptr.p_double[offsu+k];
49122             rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk;
49123             rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk;
49124             rowstorage->ptr.p_double[targetrow+col2] = rowstorage->ptr.p_double[targetrow+col2]-u20*uk;
49125         }
49126     }
49127     if( uwidth==4 )
49128     {
49129         for(k=0; k<=uheight-1; k++)
49130         {
49131             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
49132             uk = rowstorage->ptr.p_double[offsu+k];
49133             rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk;
49134             rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk;
49135             rowstorage->ptr.p_double[targetrow+col2] = rowstorage->ptr.p_double[targetrow+col2]-u20*uk;
49136             rowstorage->ptr.p_double[targetrow+col3] = rowstorage->ptr.p_double[targetrow+col3]-u30*uk;
49137         }
49138     }
49139     result = ae_true;
49140     return result;
49141 }
49142 
49143 
49144 /*************************************************************************
49145 Fast kernels for small supernodal updates: special rank-2 function.
49146 
49147 ! See comments on UpdateSupernode() for information  on generic supernodal
49148 ! updates, including notation used below.
49149 
49150 The generic update has following form:
49151 
49152     S := S - scatter(U*D*Uc')
49153 
49154 This specialized function performs rank-2 update, i.e.:
49155 * S is a tHeight*A matrix, with A<=4
49156 * U is a uHeight*2 matrix with row stride equal to 2
49157 * Uc' is a 2*B matrix, with B<=A
49158 * scatter() scatters rows and columns of U*Uc
49159 
49160 Return value:
49161 * True if update was applied
49162 * False if kernel refused to perform an update (quick exit for unsupported
49163   combinations of input sizes)
49164 
49165   -- ALGLIB routine --
49166      20.09.2020
49167      Bochkanov Sergey
49168 *************************************************************************/
spchol_updatekernelrank2(ae_vector * rowstorage,ae_int_t offss,ae_int_t twidth,ae_int_t trowstride,ae_int_t offsu,ae_int_t uheight,ae_int_t uwidth,ae_vector * diagd,ae_int_t offsd,ae_vector * raw2smap,ae_vector * superrowidx,ae_int_t urbase,ae_state * _state)49169 static ae_bool spchol_updatekernelrank2(/* Real    */ ae_vector* rowstorage,
49170      ae_int_t offss,
49171      ae_int_t twidth,
49172      ae_int_t trowstride,
49173      ae_int_t offsu,
49174      ae_int_t uheight,
49175      ae_int_t uwidth,
49176      /* Real    */ ae_vector* diagd,
49177      ae_int_t offsd,
49178      /* Integer */ ae_vector* raw2smap,
49179      /* Integer */ ae_vector* superrowidx,
49180      ae_int_t urbase,
49181      ae_state *_state)
49182 {
49183     ae_int_t k;
49184     ae_int_t targetrow;
49185     double d0;
49186     double d1;
49187     double u00;
49188     double u10;
49189     double u20;
49190     double u30;
49191     double u01;
49192     double u11;
49193     double u21;
49194     double u31;
49195     double uk0;
49196     double uk1;
49197     ae_int_t col0;
49198     ae_int_t col1;
49199     ae_int_t col2;
49200     ae_int_t col3;
49201     ae_bool result;
49202 
49203 
49204 
49205     /*
49206      * Filter out unsupported combinations (ones that are too sparse for the non-SIMD code)
49207      */
49208     result = ae_false;
49209     if( twidth>4 )
49210     {
49211         return result;
49212     }
49213     if( uwidth>4 )
49214     {
49215         return result;
49216     }
49217 
49218     /*
49219      * Determine target columns, load update matrix
49220      */
49221     d0 = diagd->ptr.p_double[offsd];
49222     d1 = diagd->ptr.p_double[offsd+1];
49223     col0 = 0;
49224     col1 = 0;
49225     col2 = 0;
49226     col3 = 0;
49227     u00 = (double)(0);
49228     u01 = (double)(0);
49229     u10 = (double)(0);
49230     u11 = (double)(0);
49231     u20 = (double)(0);
49232     u21 = (double)(0);
49233     u30 = (double)(0);
49234     u31 = (double)(0);
49235     if( uwidth>=1 )
49236     {
49237         col0 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+0]];
49238         u00 = d0*rowstorage->ptr.p_double[offsu+0];
49239         u01 = d1*rowstorage->ptr.p_double[offsu+1];
49240     }
49241     if( uwidth>=2 )
49242     {
49243         col1 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+1]];
49244         u10 = d0*rowstorage->ptr.p_double[offsu+1*2+0];
49245         u11 = d1*rowstorage->ptr.p_double[offsu+1*2+1];
49246     }
49247     if( uwidth>=3 )
49248     {
49249         col2 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+2]];
49250         u20 = d0*rowstorage->ptr.p_double[offsu+2*2+0];
49251         u21 = d1*rowstorage->ptr.p_double[offsu+2*2+1];
49252     }
49253     if( uwidth>=4 )
49254     {
49255         col3 = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+3]];
49256         u30 = d0*rowstorage->ptr.p_double[offsu+3*2+0];
49257         u31 = d1*rowstorage->ptr.p_double[offsu+3*2+1];
49258     }
49259 
49260     /*
49261      * Run update
49262      */
49263     if( uwidth==1 )
49264     {
49265         for(k=0; k<=uheight-1; k++)
49266         {
49267             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
49268             uk0 = rowstorage->ptr.p_double[offsu+2*k+0];
49269             uk1 = rowstorage->ptr.p_double[offsu+2*k+1];
49270             rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk0-u01*uk1;
49271         }
49272     }
49273     if( uwidth==2 )
49274     {
49275         for(k=0; k<=uheight-1; k++)
49276         {
49277             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
49278             uk0 = rowstorage->ptr.p_double[offsu+2*k+0];
49279             uk1 = rowstorage->ptr.p_double[offsu+2*k+1];
49280             rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk0-u01*uk1;
49281             rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk0-u11*uk1;
49282         }
49283     }
49284     if( uwidth==3 )
49285     {
49286         for(k=0; k<=uheight-1; k++)
49287         {
49288             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
49289             uk0 = rowstorage->ptr.p_double[offsu+2*k+0];
49290             uk1 = rowstorage->ptr.p_double[offsu+2*k+1];
49291             rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk0-u01*uk1;
49292             rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk0-u11*uk1;
49293             rowstorage->ptr.p_double[targetrow+col2] = rowstorage->ptr.p_double[targetrow+col2]-u20*uk0-u21*uk1;
49294         }
49295     }
49296     if( uwidth==4 )
49297     {
49298         for(k=0; k<=uheight-1; k++)
49299         {
49300             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*trowstride;
49301             uk0 = rowstorage->ptr.p_double[offsu+2*k+0];
49302             uk1 = rowstorage->ptr.p_double[offsu+2*k+1];
49303             rowstorage->ptr.p_double[targetrow+col0] = rowstorage->ptr.p_double[targetrow+col0]-u00*uk0-u01*uk1;
49304             rowstorage->ptr.p_double[targetrow+col1] = rowstorage->ptr.p_double[targetrow+col1]-u10*uk0-u11*uk1;
49305             rowstorage->ptr.p_double[targetrow+col2] = rowstorage->ptr.p_double[targetrow+col2]-u20*uk0-u21*uk1;
49306             rowstorage->ptr.p_double[targetrow+col3] = rowstorage->ptr.p_double[targetrow+col3]-u30*uk0-u31*uk1;
49307         }
49308     }
49309     result = ae_true;
49310     return result;
49311 }
49312 
49313 
49314 /*************************************************************************
49315 Debug checks for sparsity structure
49316 
49317   -- ALGLIB routine --
49318      22.08.2021
49319      Bochkanov Sergey
49320 *************************************************************************/
spchol_slowdebugchecks(sparsematrix * a,ae_vector * fillinperm,ae_int_t n,ae_int_t tail,sparsematrix * referencetaila,ae_state * _state)49321 static void spchol_slowdebugchecks(sparsematrix* a,
49322      /* Integer */ ae_vector* fillinperm,
49323      ae_int_t n,
49324      ae_int_t tail,
49325      sparsematrix* referencetaila,
49326      ae_state *_state)
49327 {
49328     ae_frame _frame_block;
49329     ae_int_t i;
49330     ae_int_t j;
49331     sparsematrix perma;
49332     ae_matrix densea;
49333 
49334     ae_frame_make(_state, &_frame_block);
49335     memset(&perma, 0, sizeof(perma));
49336     memset(&densea, 0, sizeof(densea));
49337     _sparsematrix_init(&perma, _state, ae_true);
49338     ae_matrix_init(&densea, 0, 0, DT_REAL, _state, ae_true);
49339 
49340     sparsesymmpermtblbuf(a, ae_false, fillinperm, &perma, _state);
49341     ae_matrix_set_length(&densea, n, n, _state);
49342     for(i=0; i<=n-1; i++)
49343     {
49344         for(j=0; j<=i; j++)
49345         {
49346             if( !sparseexists(&perma, i, j, _state) )
49347             {
49348                 densea.ptr.pp_double[i][j] = (double)(0);
49349                 continue;
49350             }
49351             if( i==j )
49352             {
49353                 densea.ptr.pp_double[i][j] = (double)(1);
49354             }
49355             else
49356             {
49357                 densea.ptr.pp_double[i][j] = 0.01*(ae_cos((double)(i+1), _state)+1.23*ae_sin((double)(j+1), _state))/n;
49358             }
49359         }
49360     }
49361     ae_assert(spchol_dbgmatrixcholesky2(&densea, 0, n-tail, ae_false, _state), "densechol failed", _state);
49362     rmatrixrighttrsm(tail, n-tail, &densea, 0, 0, ae_false, ae_false, 1, &densea, n-tail, 0, _state);
49363     rmatrixsyrk(tail, n-tail, -1.0, &densea, n-tail, 0, 0, 1.0, &densea, n-tail, n-tail, ae_false, _state);
49364     for(i=n-tail; i<=n-1; i++)
49365     {
49366         for(j=n-tail; j<=i; j++)
49367         {
49368             ae_assert(!(ae_fp_eq(densea.ptr.pp_double[i][j],(double)(0))&&sparseexists(referencetaila, i-(n-tail), j-(n-tail), _state)), "SPSymmAnalyze: structure check 1 failed", _state);
49369             ae_assert(!(ae_fp_neq(densea.ptr.pp_double[i][j],(double)(0))&&!sparseexists(referencetaila, i-(n-tail), j-(n-tail), _state)), "SPSymmAnalyze: structure check 2 failed", _state);
49370         }
49371     }
49372     ae_frame_leave(_state);
49373 }
49374 
49375 
49376 /*************************************************************************
49377 Dense Cholesky driver for internal integrity checks
49378 
49379   -- ALGLIB routine --
49380      22.08.2021
49381      Bochkanov Sergey
49382 *************************************************************************/
spchol_dbgmatrixcholesky2(ae_matrix * aaa,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_state * _state)49383 static ae_bool spchol_dbgmatrixcholesky2(/* Real    */ ae_matrix* aaa,
49384      ae_int_t offs,
49385      ae_int_t n,
49386      ae_bool isupper,
49387      ae_state *_state)
49388 {
49389     ae_frame _frame_block;
49390     ae_int_t i;
49391     ae_int_t j;
49392     double ajj;
49393     double v;
49394     double r;
49395     ae_vector tmp;
49396     ae_bool result;
49397 
49398     ae_frame_make(_state, &_frame_block);
49399     memset(&tmp, 0, sizeof(tmp));
49400     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
49401 
49402     ae_vector_set_length(&tmp, 2*n, _state);
49403     result = ae_true;
49404     if( n<0 )
49405     {
49406         result = ae_false;
49407         ae_frame_leave(_state);
49408         return result;
49409     }
49410 
49411     /*
49412      * Quick return if possible
49413      */
49414     if( n==0 )
49415     {
49416         ae_frame_leave(_state);
49417         return result;
49418     }
49419     if( isupper )
49420     {
49421 
49422         /*
49423          * Compute the Cholesky factorization A = U'*U.
49424          */
49425         for(j=0; j<=n-1; j++)
49426         {
49427 
49428             /*
49429              * Compute U(J,J) and test for non-positive-definiteness.
49430              */
49431             v = ae_v_dotproduct(&aaa->ptr.pp_double[offs][offs+j], aaa->stride, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(offs,offs+j-1));
49432             ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
49433             if( ae_fp_less_eq(ajj,(double)(0)) )
49434             {
49435                 aaa->ptr.pp_double[offs+j][offs+j] = ajj;
49436                 result = ae_false;
49437                 ae_frame_leave(_state);
49438                 return result;
49439             }
49440             ajj = ae_sqrt(ajj, _state);
49441             aaa->ptr.pp_double[offs+j][offs+j] = ajj;
49442 
49443             /*
49444              * Compute elements J+1:N-1 of row J.
49445              */
49446             if( j<n-1 )
49447             {
49448                 if( j>0 )
49449                 {
49450                     ae_v_moveneg(&tmp.ptr.p_double[0], 1, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(0,j-1));
49451                     rmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, &tmp, 0, &tmp, n, _state);
49452                     ae_v_add(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, &tmp.ptr.p_double[n], 1, ae_v_len(offs+j+1,offs+n-1));
49453                 }
49454                 r = 1/ajj;
49455                 ae_v_muld(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r);
49456             }
49457         }
49458     }
49459     else
49460     {
49461 
49462         /*
49463          * Compute the Cholesky factorization A = L*L'.
49464          */
49465         for(j=0; j<=n-1; j++)
49466         {
49467 
49468             /*
49469              * Compute L(J+1,J+1) and test for non-positive-definiteness.
49470              */
49471             v = ae_v_dotproduct(&aaa->ptr.pp_double[offs+j][offs], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(offs,offs+j-1));
49472             ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
49473             if( ae_fp_less_eq(ajj,(double)(0)) )
49474             {
49475                 aaa->ptr.pp_double[offs+j][offs+j] = ajj;
49476                 result = ae_false;
49477                 ae_frame_leave(_state);
49478                 return result;
49479             }
49480             ajj = ae_sqrt(ajj, _state);
49481             aaa->ptr.pp_double[offs+j][offs+j] = ajj;
49482 
49483             /*
49484              * Compute elements J+1:N of column J.
49485              */
49486             if( j<n-1 )
49487             {
49488                 r = 1/ajj;
49489                 if( j>0 )
49490                 {
49491                     ae_v_move(&tmp.ptr.p_double[0], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(0,j-1));
49492                     rmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, &tmp, 0, &tmp, n, _state);
49493                     for(i=0; i<=n-j-2; i++)
49494                     {
49495                         aaa->ptr.pp_double[offs+j+1+i][offs+j] = (aaa->ptr.pp_double[offs+j+1+i][offs+j]-tmp.ptr.p_double[n+i])*r;
49496                     }
49497                 }
49498                 else
49499                 {
49500                     for(i=0; i<=n-j-2; i++)
49501                     {
49502                         aaa->ptr.pp_double[offs+j+1+i][offs+j] = aaa->ptr.pp_double[offs+j+1+i][offs+j]*r;
49503                     }
49504                 }
49505             }
49506         }
49507     }
49508     ae_frame_leave(_state);
49509     return result;
49510 }
49511 
49512 
_spcholanalysis_init(void * _p,ae_state * _state,ae_bool make_automatic)49513 void _spcholanalysis_init(void* _p, ae_state *_state, ae_bool make_automatic)
49514 {
49515     spcholanalysis *p = (spcholanalysis*)_p;
49516     ae_touch_ptr((void*)p);
49517     ae_vector_init(&p->parentsupernode, 0, DT_INT, _state, make_automatic);
49518     ae_vector_init(&p->supercolrange, 0, DT_INT, _state, make_automatic);
49519     ae_vector_init(&p->superrowridx, 0, DT_INT, _state, make_automatic);
49520     ae_vector_init(&p->superrowidx, 0, DT_INT, _state, make_automatic);
49521     ae_vector_init(&p->fillinperm, 0, DT_INT, _state, make_automatic);
49522     ae_vector_init(&p->invfillinperm, 0, DT_INT, _state, make_automatic);
49523     ae_vector_init(&p->superperm, 0, DT_INT, _state, make_automatic);
49524     ae_vector_init(&p->invsuperperm, 0, DT_INT, _state, make_automatic);
49525     ae_vector_init(&p->effectiveperm, 0, DT_INT, _state, make_automatic);
49526     ae_vector_init(&p->inveffectiveperm, 0, DT_INT, _state, make_automatic);
49527     ae_vector_init(&p->ladjplusr, 0, DT_INT, _state, make_automatic);
49528     ae_vector_init(&p->ladjplus, 0, DT_INT, _state, make_automatic);
49529     ae_vector_init(&p->outrowcounts, 0, DT_INT, _state, make_automatic);
49530     ae_vector_init(&p->inputstorage, 0, DT_REAL, _state, make_automatic);
49531     ae_vector_init(&p->outputstorage, 0, DT_REAL, _state, make_automatic);
49532     ae_vector_init(&p->rowstrides, 0, DT_INT, _state, make_automatic);
49533     ae_vector_init(&p->rowoffsets, 0, DT_INT, _state, make_automatic);
49534     ae_vector_init(&p->diagd, 0, DT_REAL, _state, make_automatic);
49535     ae_vector_init(&p->wrkrows, 0, DT_INT, _state, make_automatic);
49536     ae_vector_init(&p->flagarray, 0, DT_BOOL, _state, make_automatic);
49537     ae_vector_init(&p->tmpparent, 0, DT_INT, _state, make_automatic);
49538     ae_vector_init(&p->node2supernode, 0, DT_INT, _state, make_automatic);
49539     ae_vector_init(&p->u2smap, 0, DT_INT, _state, make_automatic);
49540     ae_vector_init(&p->raw2smap, 0, DT_INT, _state, make_automatic);
49541     _amdbuffer_init(&p->amdtmp, _state, make_automatic);
49542     ae_vector_init(&p->tmp0, 0, DT_INT, _state, make_automatic);
49543     ae_vector_init(&p->tmp1, 0, DT_INT, _state, make_automatic);
49544     ae_vector_init(&p->tmp2, 0, DT_INT, _state, make_automatic);
49545     ae_vector_init(&p->tmp3, 0, DT_INT, _state, make_automatic);
49546     ae_vector_init(&p->tmp4, 0, DT_INT, _state, make_automatic);
49547     _sparsematrix_init(&p->tmpa, _state, make_automatic);
49548     _sparsematrix_init(&p->tmpat, _state, make_automatic);
49549     _sparsematrix_init(&p->tmpa2, _state, make_automatic);
49550     _sparsematrix_init(&p->tmpbottomt, _state, make_automatic);
49551     _sparsematrix_init(&p->tmpupdate, _state, make_automatic);
49552     _sparsematrix_init(&p->tmpupdatet, _state, make_automatic);
49553     _sparsematrix_init(&p->tmpnewtailt, _state, make_automatic);
49554     ae_vector_init(&p->tmpperm, 0, DT_INT, _state, make_automatic);
49555     ae_vector_init(&p->invtmpperm, 0, DT_INT, _state, make_automatic);
49556     ae_vector_init(&p->tmpx, 0, DT_REAL, _state, make_automatic);
49557     ae_vector_init(&p->simdbuf, 0, DT_REAL, _state, make_automatic);
49558 }
49559 
49560 
_spcholanalysis_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)49561 void _spcholanalysis_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
49562 {
49563     spcholanalysis *dst = (spcholanalysis*)_dst;
49564     spcholanalysis *src = (spcholanalysis*)_src;
49565     dst->tasktype = src->tasktype;
49566     dst->n = src->n;
49567     dst->permtype = src->permtype;
49568     dst->unitd = src->unitd;
49569     dst->modtype = src->modtype;
49570     dst->modparam0 = src->modparam0;
49571     dst->modparam1 = src->modparam1;
49572     dst->modparam2 = src->modparam2;
49573     dst->modparam3 = src->modparam3;
49574     dst->extendeddebug = src->extendeddebug;
49575     dst->dotrace = src->dotrace;
49576     dst->dotracesupernodalstructure = src->dotracesupernodalstructure;
49577     dst->nsuper = src->nsuper;
49578     ae_vector_init_copy(&dst->parentsupernode, &src->parentsupernode, _state, make_automatic);
49579     ae_vector_init_copy(&dst->supercolrange, &src->supercolrange, _state, make_automatic);
49580     ae_vector_init_copy(&dst->superrowridx, &src->superrowridx, _state, make_automatic);
49581     ae_vector_init_copy(&dst->superrowidx, &src->superrowidx, _state, make_automatic);
49582     ae_vector_init_copy(&dst->fillinperm, &src->fillinperm, _state, make_automatic);
49583     ae_vector_init_copy(&dst->invfillinperm, &src->invfillinperm, _state, make_automatic);
49584     ae_vector_init_copy(&dst->superperm, &src->superperm, _state, make_automatic);
49585     ae_vector_init_copy(&dst->invsuperperm, &src->invsuperperm, _state, make_automatic);
49586     ae_vector_init_copy(&dst->effectiveperm, &src->effectiveperm, _state, make_automatic);
49587     ae_vector_init_copy(&dst->inveffectiveperm, &src->inveffectiveperm, _state, make_automatic);
49588     dst->istopologicalordering = src->istopologicalordering;
49589     dst->applypermutationtooutput = src->applypermutationtooutput;
49590     ae_vector_init_copy(&dst->ladjplusr, &src->ladjplusr, _state, make_automatic);
49591     ae_vector_init_copy(&dst->ladjplus, &src->ladjplus, _state, make_automatic);
49592     ae_vector_init_copy(&dst->outrowcounts, &src->outrowcounts, _state, make_automatic);
49593     ae_vector_init_copy(&dst->inputstorage, &src->inputstorage, _state, make_automatic);
49594     ae_vector_init_copy(&dst->outputstorage, &src->outputstorage, _state, make_automatic);
49595     ae_vector_init_copy(&dst->rowstrides, &src->rowstrides, _state, make_automatic);
49596     ae_vector_init_copy(&dst->rowoffsets, &src->rowoffsets, _state, make_automatic);
49597     ae_vector_init_copy(&dst->diagd, &src->diagd, _state, make_automatic);
49598     ae_vector_init_copy(&dst->wrkrows, &src->wrkrows, _state, make_automatic);
49599     ae_vector_init_copy(&dst->flagarray, &src->flagarray, _state, make_automatic);
49600     ae_vector_init_copy(&dst->tmpparent, &src->tmpparent, _state, make_automatic);
49601     ae_vector_init_copy(&dst->node2supernode, &src->node2supernode, _state, make_automatic);
49602     ae_vector_init_copy(&dst->u2smap, &src->u2smap, _state, make_automatic);
49603     ae_vector_init_copy(&dst->raw2smap, &src->raw2smap, _state, make_automatic);
49604     _amdbuffer_init_copy(&dst->amdtmp, &src->amdtmp, _state, make_automatic);
49605     ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
49606     ae_vector_init_copy(&dst->tmp1, &src->tmp1, _state, make_automatic);
49607     ae_vector_init_copy(&dst->tmp2, &src->tmp2, _state, make_automatic);
49608     ae_vector_init_copy(&dst->tmp3, &src->tmp3, _state, make_automatic);
49609     ae_vector_init_copy(&dst->tmp4, &src->tmp4, _state, make_automatic);
49610     _sparsematrix_init_copy(&dst->tmpa, &src->tmpa, _state, make_automatic);
49611     _sparsematrix_init_copy(&dst->tmpat, &src->tmpat, _state, make_automatic);
49612     _sparsematrix_init_copy(&dst->tmpa2, &src->tmpa2, _state, make_automatic);
49613     _sparsematrix_init_copy(&dst->tmpbottomt, &src->tmpbottomt, _state, make_automatic);
49614     _sparsematrix_init_copy(&dst->tmpupdate, &src->tmpupdate, _state, make_automatic);
49615     _sparsematrix_init_copy(&dst->tmpupdatet, &src->tmpupdatet, _state, make_automatic);
49616     _sparsematrix_init_copy(&dst->tmpnewtailt, &src->tmpnewtailt, _state, make_automatic);
49617     ae_vector_init_copy(&dst->tmpperm, &src->tmpperm, _state, make_automatic);
49618     ae_vector_init_copy(&dst->invtmpperm, &src->invtmpperm, _state, make_automatic);
49619     ae_vector_init_copy(&dst->tmpx, &src->tmpx, _state, make_automatic);
49620     ae_vector_init_copy(&dst->simdbuf, &src->simdbuf, _state, make_automatic);
49621 }
49622 
49623 
_spcholanalysis_clear(void * _p)49624 void _spcholanalysis_clear(void* _p)
49625 {
49626     spcholanalysis *p = (spcholanalysis*)_p;
49627     ae_touch_ptr((void*)p);
49628     ae_vector_clear(&p->parentsupernode);
49629     ae_vector_clear(&p->supercolrange);
49630     ae_vector_clear(&p->superrowridx);
49631     ae_vector_clear(&p->superrowidx);
49632     ae_vector_clear(&p->fillinperm);
49633     ae_vector_clear(&p->invfillinperm);
49634     ae_vector_clear(&p->superperm);
49635     ae_vector_clear(&p->invsuperperm);
49636     ae_vector_clear(&p->effectiveperm);
49637     ae_vector_clear(&p->inveffectiveperm);
49638     ae_vector_clear(&p->ladjplusr);
49639     ae_vector_clear(&p->ladjplus);
49640     ae_vector_clear(&p->outrowcounts);
49641     ae_vector_clear(&p->inputstorage);
49642     ae_vector_clear(&p->outputstorage);
49643     ae_vector_clear(&p->rowstrides);
49644     ae_vector_clear(&p->rowoffsets);
49645     ae_vector_clear(&p->diagd);
49646     ae_vector_clear(&p->wrkrows);
49647     ae_vector_clear(&p->flagarray);
49648     ae_vector_clear(&p->tmpparent);
49649     ae_vector_clear(&p->node2supernode);
49650     ae_vector_clear(&p->u2smap);
49651     ae_vector_clear(&p->raw2smap);
49652     _amdbuffer_clear(&p->amdtmp);
49653     ae_vector_clear(&p->tmp0);
49654     ae_vector_clear(&p->tmp1);
49655     ae_vector_clear(&p->tmp2);
49656     ae_vector_clear(&p->tmp3);
49657     ae_vector_clear(&p->tmp4);
49658     _sparsematrix_clear(&p->tmpa);
49659     _sparsematrix_clear(&p->tmpat);
49660     _sparsematrix_clear(&p->tmpa2);
49661     _sparsematrix_clear(&p->tmpbottomt);
49662     _sparsematrix_clear(&p->tmpupdate);
49663     _sparsematrix_clear(&p->tmpupdatet);
49664     _sparsematrix_clear(&p->tmpnewtailt);
49665     ae_vector_clear(&p->tmpperm);
49666     ae_vector_clear(&p->invtmpperm);
49667     ae_vector_clear(&p->tmpx);
49668     ae_vector_clear(&p->simdbuf);
49669 }
49670 
49671 
_spcholanalysis_destroy(void * _p)49672 void _spcholanalysis_destroy(void* _p)
49673 {
49674     spcholanalysis *p = (spcholanalysis*)_p;
49675     ae_touch_ptr((void*)p);
49676     ae_vector_destroy(&p->parentsupernode);
49677     ae_vector_destroy(&p->supercolrange);
49678     ae_vector_destroy(&p->superrowridx);
49679     ae_vector_destroy(&p->superrowidx);
49680     ae_vector_destroy(&p->fillinperm);
49681     ae_vector_destroy(&p->invfillinperm);
49682     ae_vector_destroy(&p->superperm);
49683     ae_vector_destroy(&p->invsuperperm);
49684     ae_vector_destroy(&p->effectiveperm);
49685     ae_vector_destroy(&p->inveffectiveperm);
49686     ae_vector_destroy(&p->ladjplusr);
49687     ae_vector_destroy(&p->ladjplus);
49688     ae_vector_destroy(&p->outrowcounts);
49689     ae_vector_destroy(&p->inputstorage);
49690     ae_vector_destroy(&p->outputstorage);
49691     ae_vector_destroy(&p->rowstrides);
49692     ae_vector_destroy(&p->rowoffsets);
49693     ae_vector_destroy(&p->diagd);
49694     ae_vector_destroy(&p->wrkrows);
49695     ae_vector_destroy(&p->flagarray);
49696     ae_vector_destroy(&p->tmpparent);
49697     ae_vector_destroy(&p->node2supernode);
49698     ae_vector_destroy(&p->u2smap);
49699     ae_vector_destroy(&p->raw2smap);
49700     _amdbuffer_destroy(&p->amdtmp);
49701     ae_vector_destroy(&p->tmp0);
49702     ae_vector_destroy(&p->tmp1);
49703     ae_vector_destroy(&p->tmp2);
49704     ae_vector_destroy(&p->tmp3);
49705     ae_vector_destroy(&p->tmp4);
49706     _sparsematrix_destroy(&p->tmpa);
49707     _sparsematrix_destroy(&p->tmpat);
49708     _sparsematrix_destroy(&p->tmpa2);
49709     _sparsematrix_destroy(&p->tmpbottomt);
49710     _sparsematrix_destroy(&p->tmpupdate);
49711     _sparsematrix_destroy(&p->tmpupdatet);
49712     _sparsematrix_destroy(&p->tmpnewtailt);
49713     ae_vector_destroy(&p->tmpperm);
49714     ae_vector_destroy(&p->invtmpperm);
49715     ae_vector_destroy(&p->tmpx);
49716     ae_vector_destroy(&p->simdbuf);
49717 }
49718 
49719 
49720 #endif
49721 #if defined(AE_COMPILE_TRFAC) || !defined(AE_PARTIAL_BUILD)
49722 
49723 
49724 /*************************************************************************
49725 LU decomposition of a general real matrix with row pivoting
49726 
49727 A is represented as A = P*L*U, where:
49728 * L is lower unitriangular matrix
49729 * U is upper triangular matrix
49730 * P = P0*P1*...*PK, K=min(M,N)-1,
49731   Pi - permutation matrix for I and Pivots[I]
49732 
49733 INPUT PARAMETERS:
49734     A       -   array[0..M-1, 0..N-1].
49735     M       -   number of rows in matrix A.
49736     N       -   number of columns in matrix A.
49737 
49738 
49739 OUTPUT PARAMETERS:
49740     A       -   matrices L and U in compact form:
49741                 * L is stored under main diagonal
49742                 * U is stored on and above main diagonal
49743     Pivots  -   permutation matrix in compact form.
49744                 array[0..Min(M-1,N-1)].
49745 
49746   ! FREE EDITION OF ALGLIB:
49747   !
49748   ! Free Edition of ALGLIB supports following important features for  this
49749   ! function:
49750   ! * C++ version: x64 SIMD support using C++ intrinsics
49751   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
49752   !
49753   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
49754   ! Reference Manual in order  to  find  out  how to activate SIMD support
49755   ! in ALGLIB.
49756 
49757   ! COMMERCIAL EDITION OF ALGLIB:
49758   !
49759   ! Commercial Edition of ALGLIB includes following important improvements
49760   ! of this function:
49761   ! * high-performance native backend with same C# interface (C# version)
49762   ! * multithreading support (C++ and C# versions)
49763   ! * hardware vendor (Intel) implementations of linear algebra primitives
49764   !   (C++ and C# versions, x86/x64 platform)
49765   !
49766   ! We recommend you to read 'Working with commercial version' section  of
49767   ! ALGLIB Reference Manual in order to find out how to  use  performance-
49768   ! related features provided by commercial edition of ALGLIB.
49769 
49770   -- ALGLIB routine --
49771      10.01.2010
49772      Bochkanov Sergey
49773 *************************************************************************/
rmatrixlu(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_state * _state)49774 void rmatrixlu(/* Real    */ ae_matrix* a,
49775      ae_int_t m,
49776      ae_int_t n,
49777      /* Integer */ ae_vector* pivots,
49778      ae_state *_state)
49779 {
49780 
49781     ae_vector_clear(pivots);
49782 
49783     ae_assert(m>0, "RMatrixLU: incorrect M!", _state);
49784     ae_assert(n>0, "RMatrixLU: incorrect N!", _state);
49785     rmatrixplu(a, m, n, pivots, _state);
49786 }
49787 
49788 
49789 /*************************************************************************
49790 LU decomposition of a general complex matrix with row pivoting
49791 
49792 A is represented as A = P*L*U, where:
49793 * L is lower unitriangular matrix
49794 * U is upper triangular matrix
49795 * P = P0*P1*...*PK, K=min(M,N)-1,
49796   Pi - permutation matrix for I and Pivots[I]
49797 
49798 INPUT PARAMETERS:
49799     A       -   array[0..M-1, 0..N-1].
49800     M       -   number of rows in matrix A.
49801     N       -   number of columns in matrix A.
49802 
49803 
49804 OUTPUT PARAMETERS:
49805     A       -   matrices L and U in compact form:
49806                 * L is stored under main diagonal
49807                 * U is stored on and above main diagonal
49808     Pivots  -   permutation matrix in compact form.
49809                 array[0..Min(M-1,N-1)].
49810 
49811   ! FREE EDITION OF ALGLIB:
49812   !
49813   ! Free Edition of ALGLIB supports following important features for  this
49814   ! function:
49815   ! * C++ version: x64 SIMD support using C++ intrinsics
49816   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
49817   !
49818   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
49819   ! Reference Manual in order  to  find  out  how to activate SIMD support
49820   ! in ALGLIB.
49821 
49822   ! COMMERCIAL EDITION OF ALGLIB:
49823   !
49824   ! Commercial Edition of ALGLIB includes following important improvements
49825   ! of this function:
49826   ! * high-performance native backend with same C# interface (C# version)
49827   ! * multithreading support (C++ and C# versions)
49828   ! * hardware vendor (Intel) implementations of linear algebra primitives
49829   !   (C++ and C# versions, x86/x64 platform)
49830   !
49831   ! We recommend you to read 'Working with commercial version' section  of
49832   ! ALGLIB Reference Manual in order to find out how to  use  performance-
49833   ! related features provided by commercial edition of ALGLIB.
49834 
49835   -- ALGLIB routine --
49836      10.01.2010
49837      Bochkanov Sergey
49838 *************************************************************************/
cmatrixlu(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_state * _state)49839 void cmatrixlu(/* Complex */ ae_matrix* a,
49840      ae_int_t m,
49841      ae_int_t n,
49842      /* Integer */ ae_vector* pivots,
49843      ae_state *_state)
49844 {
49845 
49846     ae_vector_clear(pivots);
49847 
49848     ae_assert(m>0, "CMatrixLU: incorrect M!", _state);
49849     ae_assert(n>0, "CMatrixLU: incorrect N!", _state);
49850     cmatrixplu(a, m, n, pivots, _state);
49851 }
49852 
49853 
49854 /*************************************************************************
49855 Cache-oblivious Cholesky decomposition
49856 
49857 The algorithm computes Cholesky decomposition  of  a  Hermitian  positive-
49858 definite matrix. The result of an algorithm is a representation  of  A  as
49859 A=U'*U  or A=L*L' (here X' denotes conj(X^T)).
49860 
49861 INPUT PARAMETERS:
49862     A       -   upper or lower triangle of a factorized matrix.
49863                 array with elements [0..N-1, 0..N-1].
49864     N       -   size of matrix A.
49865     IsUpper -   if IsUpper=True, then A contains an upper triangle of
49866                 a symmetric matrix, otherwise A contains a lower one.
49867 
49868 OUTPUT PARAMETERS:
49869     A       -   the result of factorization. If IsUpper=True, then
49870                 the upper triangle contains matrix U, so that A = U'*U,
49871                 and the elements below the main diagonal are not modified.
49872                 Similarly, if IsUpper = False.
49873 
49874 RESULT:
49875     If  the  matrix  is  positive-definite,  the  function  returns  True.
49876     Otherwise, the function returns False. Contents of A is not determined
49877     in such case.
49878 
49879   ! FREE EDITION OF ALGLIB:
49880   !
49881   ! Free Edition of ALGLIB supports following important features for  this
49882   ! function:
49883   ! * C++ version: x64 SIMD support using C++ intrinsics
49884   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
49885   !
49886   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
49887   ! Reference Manual in order  to  find  out  how to activate SIMD support
49888   ! in ALGLIB.
49889 
49890   ! COMMERCIAL EDITION OF ALGLIB:
49891   !
49892   ! Commercial Edition of ALGLIB includes following important improvements
49893   ! of this function:
49894   ! * high-performance native backend with same C# interface (C# version)
49895   ! * multithreading support (C++ and C# versions)
49896   ! * hardware vendor (Intel) implementations of linear algebra primitives
49897   !   (C++ and C# versions, x86/x64 platform)
49898   !
49899   ! We recommend you to read 'Working with commercial version' section  of
49900   ! ALGLIB Reference Manual in order to find out how to  use  performance-
49901   ! related features provided by commercial edition of ALGLIB.
49902 
49903   -- ALGLIB routine --
49904      15.12.2009-22.01.2018
49905      Bochkanov Sergey
49906 *************************************************************************/
hpdmatrixcholesky(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_state * _state)49907 ae_bool hpdmatrixcholesky(/* Complex */ ae_matrix* a,
49908      ae_int_t n,
49909      ae_bool isupper,
49910      ae_state *_state)
49911 {
49912     ae_frame _frame_block;
49913     ae_vector tmp;
49914     ae_bool result;
49915 
49916     ae_frame_make(_state, &_frame_block);
49917     memset(&tmp, 0, sizeof(tmp));
49918     ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
49919 
49920     if( n<1 )
49921     {
49922         result = ae_false;
49923         ae_frame_leave(_state);
49924         return result;
49925     }
49926     result = trfac_hpdmatrixcholeskyrec(a, 0, n, isupper, &tmp, _state);
49927     ae_frame_leave(_state);
49928     return result;
49929 }
49930 
49931 
49932 /*************************************************************************
49933 Cache-oblivious Cholesky decomposition
49934 
49935 The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
49936 definite matrix. The result of an algorithm is a representation  of  A  as
49937 A=U^T*U  or A=L*L^T
49938 
49939 INPUT PARAMETERS:
49940     A       -   upper or lower triangle of a factorized matrix.
49941                 array with elements [0..N-1, 0..N-1].
49942     N       -   size of matrix A.
49943     IsUpper -   if IsUpper=True, then A contains an upper triangle of
49944                 a symmetric matrix, otherwise A contains a lower one.
49945 
49946 OUTPUT PARAMETERS:
49947     A       -   the result of factorization. If IsUpper=True, then
49948                 the upper triangle contains matrix U, so that A = U^T*U,
49949                 and the elements below the main diagonal are not modified.
49950                 Similarly, if IsUpper = False.
49951 
49952 RESULT:
49953     If  the  matrix  is  positive-definite,  the  function  returns  True.
49954     Otherwise, the function returns False. Contents of A is not determined
49955     in such case.
49956 
49957   ! FREE EDITION OF ALGLIB:
49958   !
49959   ! Free Edition of ALGLIB supports following important features for  this
49960   ! function:
49961   ! * C++ version: x64 SIMD support using C++ intrinsics
49962   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
49963   !
49964   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
49965   ! Reference Manual in order  to  find  out  how to activate SIMD support
49966   ! in ALGLIB.
49967 
49968   ! COMMERCIAL EDITION OF ALGLIB:
49969   !
49970   ! Commercial Edition of ALGLIB includes following important improvements
49971   ! of this function:
49972   ! * high-performance native backend with same C# interface (C# version)
49973   ! * multithreading support (C++ and C# versions)
49974   ! * hardware vendor (Intel) implementations of linear algebra primitives
49975   !   (C++ and C# versions, x86/x64 platform)
49976   !
49977   ! We recommend you to read 'Working with commercial version' section  of
49978   ! ALGLIB Reference Manual in order to find out how to  use  performance-
49979   ! related features provided by commercial edition of ALGLIB.
49980 
49981   -- ALGLIB routine --
49982      15.12.2009
49983      Bochkanov Sergey
49984 *************************************************************************/
spdmatrixcholesky(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_state * _state)49985 ae_bool spdmatrixcholesky(/* Real    */ ae_matrix* a,
49986      ae_int_t n,
49987      ae_bool isupper,
49988      ae_state *_state)
49989 {
49990     ae_frame _frame_block;
49991     ae_vector tmp;
49992     ae_bool result;
49993 
49994     ae_frame_make(_state, &_frame_block);
49995     memset(&tmp, 0, sizeof(tmp));
49996     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
49997 
49998     if( n<1 )
49999     {
50000         result = ae_false;
50001         ae_frame_leave(_state);
50002         return result;
50003     }
50004     result = spdmatrixcholeskyrec(a, 0, n, isupper, &tmp, _state);
50005     ae_frame_leave(_state);
50006     return result;
50007 }
50008 
50009 
50010 /*************************************************************************
50011 Update of Cholesky decomposition: rank-1 update to original A.  "Buffered"
50012 version which uses preallocated buffer which is saved  between  subsequent
50013 function calls.
50014 
50015 This function uses internally allocated buffer which is not saved  between
50016 subsequent  calls.  So,  if  you  perform  a lot  of  subsequent  updates,
50017 we  recommend   you   to   use   "buffered"   version   of  this function:
50018 SPDMatrixCholeskyUpdateAdd1Buf().
50019 
50020 INPUT PARAMETERS:
50021     A       -   upper or lower Cholesky factor.
50022                 array with elements [0..N-1, 0..N-1].
50023                 Exception is thrown if array size is too small.
50024     N       -   size of matrix A, N>0
50025     IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
50026                 otherwise A contains a lower one.
50027     U       -   array[N], rank-1 update to A: A_mod = A + u*u'
50028                 Exception is thrown if array size is too small.
50029     BufR    -   possibly preallocated  buffer;  automatically  resized  if
50030                 needed. It is recommended to  reuse  this  buffer  if  you
50031                 perform a lot of subsequent decompositions.
50032 
50033 OUTPUT PARAMETERS:
50034     A       -   updated factorization.  If  IsUpper=True,  then  the  upper
50035                 triangle contains matrix U, and the elements below the main
50036                 diagonal are not modified. Similarly, if IsUpper = False.
50037 
50038 NOTE: this function always succeeds, so it does not return completion code
50039 
50040 NOTE: this function checks sizes of input arrays, but it does  NOT  checks
50041       for presence of infinities or NAN's.
50042 
50043   -- ALGLIB --
50044      03.02.2014
50045      Sergey Bochkanov
50046 *************************************************************************/
spdmatrixcholeskyupdateadd1(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * u,ae_state * _state)50047 void spdmatrixcholeskyupdateadd1(/* Real    */ ae_matrix* a,
50048      ae_int_t n,
50049      ae_bool isupper,
50050      /* Real    */ ae_vector* u,
50051      ae_state *_state)
50052 {
50053     ae_frame _frame_block;
50054     ae_vector bufr;
50055 
50056     ae_frame_make(_state, &_frame_block);
50057     memset(&bufr, 0, sizeof(bufr));
50058     ae_vector_init(&bufr, 0, DT_REAL, _state, ae_true);
50059 
50060     ae_assert(n>0, "SPDMatrixCholeskyUpdateAdd1: N<=0", _state);
50061     ae_assert(a->rows>=n, "SPDMatrixCholeskyUpdateAdd1: Rows(A)<N", _state);
50062     ae_assert(a->cols>=n, "SPDMatrixCholeskyUpdateAdd1: Cols(A)<N", _state);
50063     ae_assert(u->cnt>=n, "SPDMatrixCholeskyUpdateAdd1: Length(U)<N", _state);
50064     spdmatrixcholeskyupdateadd1buf(a, n, isupper, u, &bufr, _state);
50065     ae_frame_leave(_state);
50066 }
50067 
50068 
50069 /*************************************************************************
50070 Update of Cholesky decomposition: "fixing" some variables.
50071 
50072 This function uses internally allocated buffer which is not saved  between
50073 subsequent  calls.  So,  if  you  perform  a lot  of  subsequent  updates,
50074 we  recommend   you   to   use   "buffered"   version   of  this function:
50075 SPDMatrixCholeskyUpdateFixBuf().
50076 
50077 "FIXING" EXPLAINED:
50078 
50079     Suppose we have N*N positive definite matrix A. "Fixing" some variable
50080     means filling corresponding row/column of  A  by  zeros,  and  setting
50081     diagonal element to 1.
50082 
50083     For example, if we fix 2nd variable in 4*4 matrix A, it becomes Af:
50084 
50085         ( A00  A01  A02  A03 )      ( Af00  0   Af02 Af03 )
50086         ( A10  A11  A12  A13 )      (  0    1    0    0   )
50087         ( A20  A21  A22  A23 )  =>  ( Af20  0   Af22 Af23 )
50088         ( A30  A31  A32  A33 )      ( Af30  0   Af32 Af33 )
50089 
50090     If we have Cholesky decomposition of A, it must be recalculated  after
50091     variables were  fixed.  However,  it  is  possible  to  use  efficient
50092     algorithm, which needs O(K*N^2)  time  to  "fix"  K  variables,  given
50093     Cholesky decomposition of original, "unfixed" A.
50094 
50095 INPUT PARAMETERS:
50096     A       -   upper or lower Cholesky factor.
50097                 array with elements [0..N-1, 0..N-1].
50098                 Exception is thrown if array size is too small.
50099     N       -   size of matrix A, N>0
50100     IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
50101                 otherwise A contains a lower one.
50102     Fix     -   array[N], I-th element is True if I-th  variable  must  be
50103                 fixed. Exception is thrown if array size is too small.
50104     BufR    -   possibly preallocated  buffer;  automatically  resized  if
50105                 needed. It is recommended to  reuse  this  buffer  if  you
50106                 perform a lot of subsequent decompositions.
50107 
50108 OUTPUT PARAMETERS:
50109     A       -   updated factorization.  If  IsUpper=True,  then  the  upper
50110                 triangle contains matrix U, and the elements below the main
50111                 diagonal are not modified. Similarly, if IsUpper = False.
50112 
50113 NOTE: this function always succeeds, so it does not return completion code
50114 
50115 NOTE: this function checks sizes of input arrays, but it does  NOT  checks
50116       for presence of infinities or NAN's.
50117 
50118 NOTE: this  function  is  efficient  only  for  moderate amount of updated
50119       variables - say, 0.1*N or 0.3*N. For larger amount of  variables  it
50120       will  still  work,  but  you  may  get   better   performance   with
50121       straightforward Cholesky.
50122 
50123   -- ALGLIB --
50124      03.02.2014
50125      Sergey Bochkanov
50126 *************************************************************************/
spdmatrixcholeskyupdatefix(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * fix,ae_state * _state)50127 void spdmatrixcholeskyupdatefix(/* Real    */ ae_matrix* a,
50128      ae_int_t n,
50129      ae_bool isupper,
50130      /* Boolean */ ae_vector* fix,
50131      ae_state *_state)
50132 {
50133     ae_frame _frame_block;
50134     ae_vector bufr;
50135 
50136     ae_frame_make(_state, &_frame_block);
50137     memset(&bufr, 0, sizeof(bufr));
50138     ae_vector_init(&bufr, 0, DT_REAL, _state, ae_true);
50139 
50140     ae_assert(n>0, "SPDMatrixCholeskyUpdateFix: N<=0", _state);
50141     ae_assert(a->rows>=n, "SPDMatrixCholeskyUpdateFix: Rows(A)<N", _state);
50142     ae_assert(a->cols>=n, "SPDMatrixCholeskyUpdateFix: Cols(A)<N", _state);
50143     ae_assert(fix->cnt>=n, "SPDMatrixCholeskyUpdateFix: Length(Fix)<N", _state);
50144     spdmatrixcholeskyupdatefixbuf(a, n, isupper, fix, &bufr, _state);
50145     ae_frame_leave(_state);
50146 }
50147 
50148 
50149 /*************************************************************************
50150 Update of Cholesky decomposition: rank-1 update to original A.  "Buffered"
50151 version which uses preallocated buffer which is saved  between  subsequent
50152 function calls.
50153 
50154 See comments for SPDMatrixCholeskyUpdateAdd1() for more information.
50155 
50156 INPUT PARAMETERS:
50157     A       -   upper or lower Cholesky factor.
50158                 array with elements [0..N-1, 0..N-1].
50159                 Exception is thrown if array size is too small.
50160     N       -   size of matrix A, N>0
50161     IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
50162                 otherwise A contains a lower one.
50163     U       -   array[N], rank-1 update to A: A_mod = A + u*u'
50164                 Exception is thrown if array size is too small.
50165     BufR    -   possibly preallocated  buffer;  automatically  resized  if
50166                 needed. It is recommended to  reuse  this  buffer  if  you
50167                 perform a lot of subsequent decompositions.
50168 
50169 OUTPUT PARAMETERS:
50170     A       -   updated factorization.  If  IsUpper=True,  then  the  upper
50171                 triangle contains matrix U, and the elements below the main
50172                 diagonal are not modified. Similarly, if IsUpper = False.
50173 
50174   -- ALGLIB --
50175      03.02.2014
50176      Sergey Bochkanov
50177 *************************************************************************/
spdmatrixcholeskyupdateadd1buf(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * u,ae_vector * bufr,ae_state * _state)50178 void spdmatrixcholeskyupdateadd1buf(/* Real    */ ae_matrix* a,
50179      ae_int_t n,
50180      ae_bool isupper,
50181      /* Real    */ ae_vector* u,
50182      /* Real    */ ae_vector* bufr,
50183      ae_state *_state)
50184 {
50185     ae_int_t i;
50186     ae_int_t j;
50187     ae_int_t nz;
50188     double cs;
50189     double sn;
50190     double v;
50191     double vv;
50192 
50193 
50194     ae_assert(n>0, "SPDMatrixCholeskyUpdateAdd1Buf: N<=0", _state);
50195     ae_assert(a->rows>=n, "SPDMatrixCholeskyUpdateAdd1Buf: Rows(A)<N", _state);
50196     ae_assert(a->cols>=n, "SPDMatrixCholeskyUpdateAdd1Buf: Cols(A)<N", _state);
50197     ae_assert(u->cnt>=n, "SPDMatrixCholeskyUpdateAdd1Buf: Length(U)<N", _state);
50198 
50199     /*
50200      * Find index of first non-zero entry in U
50201      */
50202     nz = n;
50203     for(i=0; i<=n-1; i++)
50204     {
50205         if( ae_fp_neq(u->ptr.p_double[i],(double)(0)) )
50206         {
50207             nz = i;
50208             break;
50209         }
50210     }
50211     if( nz==n )
50212     {
50213 
50214         /*
50215          * Nothing to update
50216          */
50217         return;
50218     }
50219 
50220     /*
50221      * If working with upper triangular matrix
50222      */
50223     if( isupper )
50224     {
50225 
50226         /*
50227          * Perform a sequence of updates which fix variables one by one.
50228          * This approach is different from one which is used when we work
50229          * with lower triangular matrix.
50230          */
50231         rvectorsetlengthatleast(bufr, n, _state);
50232         for(j=nz; j<=n-1; j++)
50233         {
50234             bufr->ptr.p_double[j] = u->ptr.p_double[j];
50235         }
50236         for(i=nz; i<=n-1; i++)
50237         {
50238             if( ae_fp_neq(bufr->ptr.p_double[i],(double)(0)) )
50239             {
50240                 generaterotation(a->ptr.pp_double[i][i], bufr->ptr.p_double[i], &cs, &sn, &v, _state);
50241                 a->ptr.pp_double[i][i] = v;
50242                 bufr->ptr.p_double[i] = 0.0;
50243                 for(j=i+1; j<=n-1; j++)
50244                 {
50245                     v = a->ptr.pp_double[i][j];
50246                     vv = bufr->ptr.p_double[j];
50247                     a->ptr.pp_double[i][j] = cs*v+sn*vv;
50248                     bufr->ptr.p_double[j] = -sn*v+cs*vv;
50249                 }
50250             }
50251         }
50252     }
50253     else
50254     {
50255 
50256         /*
50257          * Calculate rows of modified Cholesky factor, row-by-row
50258          * (updates performed during variable fixing are applied
50259          * simultaneously to each row)
50260          */
50261         rvectorsetlengthatleast(bufr, 3*n, _state);
50262         for(j=nz; j<=n-1; j++)
50263         {
50264             bufr->ptr.p_double[j] = u->ptr.p_double[j];
50265         }
50266         for(i=nz; i<=n-1; i++)
50267         {
50268 
50269             /*
50270              * Update all previous updates [Idx+1...I-1] to I-th row
50271              */
50272             vv = bufr->ptr.p_double[i];
50273             for(j=nz; j<=i-1; j++)
50274             {
50275                 cs = bufr->ptr.p_double[n+2*j+0];
50276                 sn = bufr->ptr.p_double[n+2*j+1];
50277                 v = a->ptr.pp_double[i][j];
50278                 a->ptr.pp_double[i][j] = cs*v+sn*vv;
50279                 vv = -sn*v+cs*vv;
50280             }
50281 
50282             /*
50283              * generate rotation applied to I-th element of update vector
50284              */
50285             generaterotation(a->ptr.pp_double[i][i], vv, &cs, &sn, &v, _state);
50286             a->ptr.pp_double[i][i] = v;
50287             bufr->ptr.p_double[n+2*i+0] = cs;
50288             bufr->ptr.p_double[n+2*i+1] = sn;
50289         }
50290     }
50291 }
50292 
50293 
50294 /*************************************************************************
50295 Update of Cholesky  decomposition:  "fixing"  some  variables.  "Buffered"
50296 version which uses preallocated buffer which is saved  between  subsequent
50297 function calls.
50298 
50299 See comments for SPDMatrixCholeskyUpdateFix() for more information.
50300 
50301 INPUT PARAMETERS:
50302     A       -   upper or lower Cholesky factor.
50303                 array with elements [0..N-1, 0..N-1].
50304                 Exception is thrown if array size is too small.
50305     N       -   size of matrix A, N>0
50306     IsUpper -   if IsUpper=True, then A contains  upper  Cholesky  factor;
50307                 otherwise A contains a lower one.
50308     Fix     -   array[N], I-th element is True if I-th  variable  must  be
50309                 fixed. Exception is thrown if array size is too small.
50310     BufR    -   possibly preallocated  buffer;  automatically  resized  if
50311                 needed. It is recommended to  reuse  this  buffer  if  you
50312                 perform a lot of subsequent decompositions.
50313 
50314 OUTPUT PARAMETERS:
50315     A       -   updated factorization.  If  IsUpper=True,  then  the  upper
50316                 triangle contains matrix U, and the elements below the main
50317                 diagonal are not modified. Similarly, if IsUpper = False.
50318 
50319   -- ALGLIB --
50320      03.02.2014
50321      Sergey Bochkanov
50322 *************************************************************************/
spdmatrixcholeskyupdatefixbuf(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * fix,ae_vector * bufr,ae_state * _state)50323 void spdmatrixcholeskyupdatefixbuf(/* Real    */ ae_matrix* a,
50324      ae_int_t n,
50325      ae_bool isupper,
50326      /* Boolean */ ae_vector* fix,
50327      /* Real    */ ae_vector* bufr,
50328      ae_state *_state)
50329 {
50330     ae_int_t i;
50331     ae_int_t j;
50332     ae_int_t k;
50333     ae_int_t nfix;
50334     ae_int_t idx;
50335     double cs;
50336     double sn;
50337     double v;
50338     double vv;
50339 
50340 
50341     ae_assert(n>0, "SPDMatrixCholeskyUpdateFixBuf: N<=0", _state);
50342     ae_assert(a->rows>=n, "SPDMatrixCholeskyUpdateFixBuf: Rows(A)<N", _state);
50343     ae_assert(a->cols>=n, "SPDMatrixCholeskyUpdateFixBuf: Cols(A)<N", _state);
50344     ae_assert(fix->cnt>=n, "SPDMatrixCholeskyUpdateFixBuf: Length(Fix)<N", _state);
50345 
50346     /*
50347      * Count number of variables to fix.
50348      * Quick exit if NFix=0 or NFix=N
50349      */
50350     nfix = 0;
50351     for(i=0; i<=n-1; i++)
50352     {
50353         if( fix->ptr.p_bool[i] )
50354         {
50355             inc(&nfix, _state);
50356         }
50357     }
50358     if( nfix==0 )
50359     {
50360 
50361         /*
50362          * Nothing to fix
50363          */
50364         return;
50365     }
50366     if( nfix==n )
50367     {
50368 
50369         /*
50370          * All variables are fixed.
50371          * Set A to identity and exit.
50372          */
50373         if( isupper )
50374         {
50375             for(i=0; i<=n-1; i++)
50376             {
50377                 a->ptr.pp_double[i][i] = (double)(1);
50378                 for(j=i+1; j<=n-1; j++)
50379                 {
50380                     a->ptr.pp_double[i][j] = (double)(0);
50381                 }
50382             }
50383         }
50384         else
50385         {
50386             for(i=0; i<=n-1; i++)
50387             {
50388                 for(j=0; j<=i-1; j++)
50389                 {
50390                     a->ptr.pp_double[i][j] = (double)(0);
50391                 }
50392                 a->ptr.pp_double[i][i] = (double)(1);
50393             }
50394         }
50395         return;
50396     }
50397 
50398     /*
50399      * If working with upper triangular matrix
50400      */
50401     if( isupper )
50402     {
50403 
50404         /*
50405          * Perform a sequence of updates which fix variables one by one.
50406          * This approach is different from one which is used when we work
50407          * with lower triangular matrix.
50408          */
50409         rvectorsetlengthatleast(bufr, n, _state);
50410         for(k=0; k<=n-1; k++)
50411         {
50412             if( fix->ptr.p_bool[k] )
50413             {
50414                 idx = k;
50415 
50416                 /*
50417                  * Quick exit if it is last variable
50418                  */
50419                 if( idx==n-1 )
50420                 {
50421                     for(i=0; i<=idx-1; i++)
50422                     {
50423                         a->ptr.pp_double[i][idx] = 0.0;
50424                     }
50425                     a->ptr.pp_double[idx][idx] = 1.0;
50426                     continue;
50427                 }
50428 
50429                 /*
50430                  * We have Cholesky decomposition of quadratic term in A,
50431                  * with upper triangle being stored as given below:
50432                  *
50433                  *         ( U00 u01 U02 )
50434                  *     U = (     u11 u12 )
50435                  *         (         U22 )
50436                  *
50437                  * Here u11 is diagonal element corresponding to variable K. We
50438                  * want to fix this variable, and we do so by modifying U as follows:
50439                  *
50440                  *             ( U00  0  U02 )
50441                  *     U_mod = (      1   0  )
50442                  *             (         U_m )
50443                  *
50444                  * with U_m = CHOLESKY [ (U22^T)*U22 + (u12^T)*u12 ]
50445                  *
50446                  * Of course, we can calculate U_m by calculating (U22^T)*U22 explicitly,
50447                  * modifying it and performing Cholesky decomposition of modified matrix.
50448                  * However, we can treat it as follows:
50449                  * * we already have CHOLESKY[(U22^T)*U22], which is equal to U22
50450                  * * we have rank-1 update (u12^T)*u12 applied to (U22^T)*U22
50451                  * * thus, we can calculate updated Cholesky with O(N^2) algorithm
50452                  *   instead of O(N^3) one
50453                  */
50454                 for(j=idx+1; j<=n-1; j++)
50455                 {
50456                     bufr->ptr.p_double[j] = a->ptr.pp_double[idx][j];
50457                 }
50458                 for(i=0; i<=idx-1; i++)
50459                 {
50460                     a->ptr.pp_double[i][idx] = 0.0;
50461                 }
50462                 a->ptr.pp_double[idx][idx] = 1.0;
50463                 for(i=idx+1; i<=n-1; i++)
50464                 {
50465                     a->ptr.pp_double[idx][i] = 0.0;
50466                 }
50467                 for(i=idx+1; i<=n-1; i++)
50468                 {
50469                     if( ae_fp_neq(bufr->ptr.p_double[i],(double)(0)) )
50470                     {
50471                         generaterotation(a->ptr.pp_double[i][i], bufr->ptr.p_double[i], &cs, &sn, &v, _state);
50472                         a->ptr.pp_double[i][i] = v;
50473                         bufr->ptr.p_double[i] = 0.0;
50474                         for(j=i+1; j<=n-1; j++)
50475                         {
50476                             v = a->ptr.pp_double[i][j];
50477                             vv = bufr->ptr.p_double[j];
50478                             a->ptr.pp_double[i][j] = cs*v+sn*vv;
50479                             bufr->ptr.p_double[j] = -sn*v+cs*vv;
50480                         }
50481                     }
50482                 }
50483             }
50484         }
50485     }
50486     else
50487     {
50488 
50489         /*
50490          * Calculate rows of modified Cholesky factor, row-by-row
50491          * (updates performed during variable fixing are applied
50492          * simultaneously to each row)
50493          */
50494         rvectorsetlengthatleast(bufr, 3*n, _state);
50495         for(k=0; k<=n-1; k++)
50496         {
50497             if( fix->ptr.p_bool[k] )
50498             {
50499                 idx = k;
50500 
50501                 /*
50502                  * Quick exit if it is last variable
50503                  */
50504                 if( idx==n-1 )
50505                 {
50506                     for(i=0; i<=idx-1; i++)
50507                     {
50508                         a->ptr.pp_double[idx][i] = 0.0;
50509                     }
50510                     a->ptr.pp_double[idx][idx] = 1.0;
50511                     continue;
50512                 }
50513 
50514                 /*
50515                  * store column to buffer and clear row/column of A
50516                  */
50517                 for(j=idx+1; j<=n-1; j++)
50518                 {
50519                     bufr->ptr.p_double[j] = a->ptr.pp_double[j][idx];
50520                 }
50521                 for(i=0; i<=idx-1; i++)
50522                 {
50523                     a->ptr.pp_double[idx][i] = 0.0;
50524                 }
50525                 a->ptr.pp_double[idx][idx] = 1.0;
50526                 for(i=idx+1; i<=n-1; i++)
50527                 {
50528                     a->ptr.pp_double[i][idx] = 0.0;
50529                 }
50530 
50531                 /*
50532                  * Apply update to rows of A
50533                  */
50534                 for(i=idx+1; i<=n-1; i++)
50535                 {
50536 
50537                     /*
50538                      * Update all previous updates [Idx+1...I-1] to I-th row
50539                      */
50540                     vv = bufr->ptr.p_double[i];
50541                     for(j=idx+1; j<=i-1; j++)
50542                     {
50543                         cs = bufr->ptr.p_double[n+2*j+0];
50544                         sn = bufr->ptr.p_double[n+2*j+1];
50545                         v = a->ptr.pp_double[i][j];
50546                         a->ptr.pp_double[i][j] = cs*v+sn*vv;
50547                         vv = -sn*v+cs*vv;
50548                     }
50549 
50550                     /*
50551                      * generate rotation applied to I-th element of update vector
50552                      */
50553                     generaterotation(a->ptr.pp_double[i][i], vv, &cs, &sn, &v, _state);
50554                     a->ptr.pp_double[i][i] = v;
50555                     bufr->ptr.p_double[n+2*i+0] = cs;
50556                     bufr->ptr.p_double[n+2*i+1] = sn;
50557                 }
50558             }
50559         }
50560     }
50561 }
50562 
50563 
50564 /*************************************************************************
50565 Sparse LU decomposition with column pivoting for sparsity and row pivoting
50566 for stability. Input must be square sparse matrix stored in CRS format.
50567 
50568 The algorithm  computes  LU  decomposition  of  a  general  square  matrix
50569 (rectangular ones are not supported). The result  of  an  algorithm  is  a
50570 representation of A as A = P*L*U*Q, where:
50571 * L is lower unitriangular matrix
50572 * U is upper triangular matrix
50573 * P = P0*P1*...*PK, K=N-1, Pi - permutation matrix for I and P[I]
50574 * Q = QK*...*Q1*Q0, K=N-1, Qi - permutation matrix for I and Q[I]
50575 
50576 This function pivots columns for higher sparsity, and then pivots rows for
50577 stability (larger element at the diagonal).
50578 
50579 INPUT PARAMETERS:
50580     A       -   sparse NxN matrix in CRS format. An exception is generated
50581                 if matrix is non-CRS or non-square.
50582     PivotType-  pivoting strategy:
50583                 * 0 for best pivoting available (2 in current version)
50584                 * 1 for row-only pivoting (NOT RECOMMENDED)
50585                 * 2 for complete pivoting which produces most sparse outputs
50586 
50587 OUTPUT PARAMETERS:
50588     A       -   the result of factorization, matrices L and U stored in
50589                 compact form using CRS sparse storage format:
50590                 * lower unitriangular L is stored strictly under main diagonal
50591                 * upper triangilar U is stored ON and ABOVE main diagonal
50592     P       -   row permutation matrix in compact form, array[N]
50593     Q       -   col permutation matrix in compact form, array[N]
50594 
50595 This function always succeeds, i.e. it ALWAYS returns valid factorization,
50596 but for your convenience it also returns  boolean  value  which  helps  to
50597 detect symbolically degenerate matrices:
50598 * function returns TRUE, if the matrix was factorized AND symbolically
50599   non-degenerate
50600 * function returns FALSE, if the matrix was factorized but U has strictly
50601   zero elements at the diagonal (the factorization is returned anyway).
50602 
50603 
50604   -- ALGLIB routine --
50605      03.09.2018
50606      Bochkanov Sergey
50607 *************************************************************************/
sparselu(sparsematrix * a,ae_int_t pivottype,ae_vector * p,ae_vector * q,ae_state * _state)50608 ae_bool sparselu(sparsematrix* a,
50609      ae_int_t pivottype,
50610      /* Integer */ ae_vector* p,
50611      /* Integer */ ae_vector* q,
50612      ae_state *_state)
50613 {
50614     ae_frame _frame_block;
50615     sluv2buffer buf2;
50616     ae_bool result;
50617 
50618     ae_frame_make(_state, &_frame_block);
50619     memset(&buf2, 0, sizeof(buf2));
50620     ae_vector_clear(p);
50621     ae_vector_clear(q);
50622     _sluv2buffer_init(&buf2, _state, ae_true);
50623 
50624     ae_assert((pivottype==0||pivottype==1)||pivottype==2, "SparseLU: unexpected pivot type", _state);
50625     ae_assert(sparseiscrs(a, _state), "SparseLU: A is not stored in CRS format", _state);
50626     ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseLU: non-square A", _state);
50627     result = sptrflu(a, pivottype, p, q, &buf2, _state);
50628     ae_frame_leave(_state);
50629     return result;
50630 }
50631 
50632 
50633 /*************************************************************************
50634 Sparse Cholesky decomposition for skyline matrixm using in-place algorithm
50635 without allocating additional storage.
50636 
50637 The algorithm computes Cholesky decomposition  of  a  symmetric  positive-
50638 definite sparse matrix. The result of an algorithm is a representation  of
50639 A as A=U^T*U or A=L*L^T
50640 
50641 This function allows to perform very efficient decomposition of low-profile
50642 matrices (average bandwidth is ~5-10 elements). For larger matrices it  is
50643 recommended to use supernodal Cholesky decomposition: SparseCholeskyP() or
50644 SparseCholeskyAnalyze()/SparseCholeskyFactorize().
50645 
50646 INPUT PARAMETERS:
50647     A       -   sparse matrix in skyline storage (SKS) format.
50648     N       -   size of matrix A (can be smaller than actual size of A)
50649     IsUpper -   if IsUpper=True, then factorization is performed on  upper
50650                 triangle. Another triangle is ignored (it may contant some
50651                 data, but it is not changed).
50652 
50653 
50654 OUTPUT PARAMETERS:
50655     A       -   the result of factorization, stored in SKS. If IsUpper=True,
50656                 then the upper  triangle  contains  matrix  U,  such  that
50657                 A = U^T*U. Lower triangle is not changed.
50658                 Similarly, if IsUpper = False. In this case L is returned,
50659                 and we have A = L*(L^T).
50660                 Note that THIS function does not  perform  permutation  of
50661                 rows to reduce bandwidth.
50662 
50663 RESULT:
50664     If  the  matrix  is  positive-definite,  the  function  returns  True.
50665     Otherwise, the function returns False. Contents of A is not determined
50666     in such case.
50667 
50668 NOTE: for  performance  reasons  this  function  does NOT check that input
50669       matrix  includes  only  finite  values. It is your responsibility to
50670       make sure that there are no infinite or NAN values in the matrix.
50671 
50672   -- ALGLIB routine --
50673      16.01.2014
50674      Bochkanov Sergey
50675 *************************************************************************/
sparsecholeskyskyline(sparsematrix * a,ae_int_t n,ae_bool isupper,ae_state * _state)50676 ae_bool sparsecholeskyskyline(sparsematrix* a,
50677      ae_int_t n,
50678      ae_bool isupper,
50679      ae_state *_state)
50680 {
50681     ae_int_t i;
50682     ae_int_t j;
50683     ae_int_t k;
50684     ae_int_t jnz;
50685     ae_int_t jnza;
50686     ae_int_t jnzl;
50687     double v;
50688     double vv;
50689     double a12;
50690     ae_int_t nready;
50691     ae_int_t nadd;
50692     ae_int_t banda;
50693     ae_int_t offsa;
50694     ae_int_t offsl;
50695     ae_bool result;
50696 
50697 
50698     ae_assert(n>=0, "SparseCholeskySkyline: N<0", _state);
50699     ae_assert(sparsegetnrows(a, _state)>=n, "SparseCholeskySkyline: rows(A)<N", _state);
50700     ae_assert(sparsegetncols(a, _state)>=n, "SparseCholeskySkyline: cols(A)<N", _state);
50701     ae_assert(sparseissks(a, _state), "SparseCholeskySkyline: A is not stored in SKS format", _state);
50702     result = ae_false;
50703 
50704     /*
50705      * transpose if needed
50706      */
50707     if( isupper )
50708     {
50709         sparsetransposesks(a, _state);
50710     }
50711 
50712     /*
50713      * Perform Cholesky decomposition:
50714      * * we assume than leading NReady*NReady submatrix is done
50715      * * having Cholesky decomposition of NReady*NReady submatrix we
50716      *   obtain decomposition of larger (NReady+NAdd)*(NReady+NAdd) one.
50717      *
50718      * Here is algorithm. At the start we have
50719      *
50720      *     (      |   )
50721      *     (  L   |   )
50722      * S = (      |   )
50723      *     (----------)
50724      *     (  A   | B )
50725      *
50726      * with L being already computed Cholesky factor, A and B being
50727      * unprocessed parts of the matrix. Of course, L/A/B are stored
50728      * in SKS format.
50729      *
50730      * Then, we calculate A1:=(inv(L)*A')' and replace A with A1.
50731      * Then, we calculate B1:=B-A1*A1'     and replace B with B1
50732      *
50733      * Finally, we calculate small NAdd*NAdd Cholesky of B1 with
50734      * dense solver. Now, L/A1/B1 are Cholesky decomposition of the
50735      * larger (NReady+NAdd)*(NReady+NAdd) matrix.
50736      */
50737     nready = 0;
50738     nadd = 1;
50739     while(nready<n)
50740     {
50741         ae_assert(nadd==1, "SkylineCholesky: internal error", _state);
50742 
50743         /*
50744          * Calculate A1:=(inv(L)*A')'
50745          *
50746          * Elements are calculated row by row (example below is given
50747          * for NAdd=1):
50748          * * first, we solve L[0,0]*A1[0]=A[0]
50749          * * then, we solve  L[1,0]*A1[0]+L[1,1]*A1[1]=A[1]
50750          * * then, we move to next row and so on
50751          * * during calculation of A1 we update A12 - squared norm of A1
50752          *
50753          * We extensively use sparsity of both A/A1 and L:
50754          * * first, equations from 0 to BANDWIDTH(A1)-1 are completely zero
50755          * * second, for I>=BANDWIDTH(A1), I-th equation is reduced from
50756          *     L[I,0]*A1[0] + L[I,1]*A1[1] + ... + L[I,I]*A1[I] = A[I]
50757          *   to
50758          *     L[I,JNZ]*A1[JNZ] + ... + L[I,I]*A1[I] = A[I]
50759          *   where JNZ = max(NReady-BANDWIDTH(A1),I-BANDWIDTH(L[i]))
50760          *   (JNZ is an index of the firts column where both A and L become
50761          *   nonzero).
50762          *
50763          * NOTE: we rely on details of SparseMatrix internal storage format.
50764          *       This is allowed by SparseMatrix specification.
50765          */
50766         a12 = 0.0;
50767         if( a->didx.ptr.p_int[nready]>0 )
50768         {
50769             banda = a->didx.ptr.p_int[nready];
50770             for(i=nready-banda; i<=nready-1; i++)
50771             {
50772 
50773                 /*
50774                  * Elements of A1[0:I-1] were computed:
50775                  * * A1[0:NReady-BandA-1] are zero (sparse)
50776                  * * A1[NReady-BandA:I-1] replaced corresponding elements of A
50777                  *
50778                  * Now it is time to get I-th one.
50779                  *
50780                  * First, we calculate:
50781                  * * JNZA  - index of the first column where A become nonzero
50782                  * * JNZL  - index of the first column where L become nonzero
50783                  * * JNZ   - index of the first column where both A and L become nonzero
50784                  * * OffsA - offset of A[JNZ] in A.Vals
50785                  * * OffsL - offset of L[I,JNZ] in A.Vals
50786                  *
50787                  * Then, we solve SUM(A1[j]*L[I,j],j=JNZ..I-1) + A1[I]*L[I,I] = A[I],
50788                  * with A1[JNZ..I-1] already known, and A1[I] unknown.
50789                  */
50790                 jnza = nready-banda;
50791                 jnzl = i-a->didx.ptr.p_int[i];
50792                 jnz = ae_maxint(jnza, jnzl, _state);
50793                 offsa = a->ridx.ptr.p_int[nready]+(jnz-jnza);
50794                 offsl = a->ridx.ptr.p_int[i]+(jnz-jnzl);
50795                 v = 0.0;
50796                 k = i-1-jnz;
50797                 for(j=0; j<=k; j++)
50798                 {
50799                     v = v+a->vals.ptr.p_double[offsa+j]*a->vals.ptr.p_double[offsl+j];
50800                 }
50801                 vv = (a->vals.ptr.p_double[offsa+k+1]-v)/a->vals.ptr.p_double[offsl+k+1];
50802                 a->vals.ptr.p_double[offsa+k+1] = vv;
50803                 a12 = a12+vv*vv;
50804             }
50805         }
50806 
50807         /*
50808          * Calculate CHOLESKY(B-A1*A1')
50809          */
50810         offsa = a->ridx.ptr.p_int[nready]+a->didx.ptr.p_int[nready];
50811         v = a->vals.ptr.p_double[offsa];
50812         if( ae_fp_less_eq(v,a12) )
50813         {
50814             result = ae_false;
50815             return result;
50816         }
50817         a->vals.ptr.p_double[offsa] = ae_sqrt(v-a12, _state);
50818 
50819         /*
50820          * Increase size of the updated matrix
50821          */
50822         inc(&nready, _state);
50823     }
50824 
50825     /*
50826      * transpose if needed
50827      */
50828     if( isupper )
50829     {
50830         sparsetransposesks(a, _state);
50831     }
50832     result = ae_true;
50833     return result;
50834 }
50835 
50836 
50837 /*************************************************************************
50838 Sparse Cholesky decomposition for a matrix  stored  in  any sparse storage,
50839 without rows/cols permutation.
50840 
50841 This function is the most convenient (less parameters to specify), although
50842 less efficient, version of sparse Cholesky.
50843 
50844 Internally it:
50845 * calls SparseCholeskyAnalyze()  function  to  perform  symbolic  analysis
50846   phase with no permutation being configured.
50847 * calls SparseCholeskyFactorize() function to perform numerical  phase  of
50848   the factorization
50849 
50850 Following alternatives may result in better performance:
50851 * using SparseCholeskyP(), which selects best  pivoting  available,  which
50852   almost always results in improved sparsity and cache locality
50853 * using  SparseCholeskyAnalyze() and SparseCholeskyFactorize()   functions
50854   directly,  which  may  improve  performance of repetitive factorizations
50855   with same sparsity patterns.
50856 
50857 The latter also allows one to perform  LDLT  factorization  of  indefinite
50858 matrix (one with strictly diagonal D, which is known  to  be  stable  only
50859 in few special cases, like quasi-definite matrices).
50860 
50861 INPUT PARAMETERS:
50862     A       -   a square NxN sparse matrix, stored in any storage format.
50863     IsUpper -   if IsUpper=True, then factorization is performed on  upper
50864                 triangle.  Another triangle is ignored on  input,  dropped
50865                 on output. Similarly, if IsUpper=False, the lower triangle
50866                 is processed.
50867 
50868 OUTPUT PARAMETERS:
50869     A       -   the result of factorization, stored in CRS format:
50870                 * if IsUpper=True, then the upper triangle contains matrix
50871                   U such  that  A = U^T*U and the lower triangle is empty.
50872                 * similarly, if IsUpper=False, then lower triangular L  is
50873                   returned and we have A = L*(L^T).
50874                 Note that THIS function does not  perform  permutation  of
50875                 the rows to reduce fill-in.
50876 
50877 RESULT:
50878     If  the  matrix  is  positive-definite,  the  function  returns  True.
50879     Otherwise, the function returns False.  Contents  of  A  is  undefined
50880     in such case.
50881 
50882 NOTE: for  performance  reasons  this  function  does NOT check that input
50883       matrix  includes  only  finite  values. It is your responsibility to
50884       make sure that there are no infinite or NAN values in the matrix.
50885 
50886   -- ALGLIB routine --
50887      16.09.2020
50888      Bochkanov Sergey
50889 *************************************************************************/
sparsecholesky(sparsematrix * a,ae_bool isupper,ae_state * _state)50890 ae_bool sparsecholesky(sparsematrix* a, ae_bool isupper, ae_state *_state)
50891 {
50892     ae_frame _frame_block;
50893     sparsedecompositionanalysis analysis;
50894     ae_int_t facttype;
50895     ae_int_t permtype;
50896     ae_vector dummyd;
50897     ae_vector dummyp;
50898     ae_bool result;
50899 
50900     ae_frame_make(_state, &_frame_block);
50901     memset(&analysis, 0, sizeof(analysis));
50902     memset(&dummyd, 0, sizeof(dummyd));
50903     memset(&dummyp, 0, sizeof(dummyp));
50904     _sparsedecompositionanalysis_init(&analysis, _state, ae_true);
50905     ae_vector_init(&dummyd, 0, DT_REAL, _state, ae_true);
50906     ae_vector_init(&dummyp, 0, DT_INT, _state, ae_true);
50907 
50908     ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseCholesky: A is not square", _state);
50909 
50910     /*
50911      * Quick exit
50912      */
50913     if( sparsegetnrows(a, _state)==0 )
50914     {
50915         result = ae_true;
50916         ae_frame_leave(_state);
50917         return result;
50918     }
50919 
50920     /*
50921      * Choose factorization and permutation: vanilla Cholesky and no permutation
50922      */
50923     facttype = 0;
50924     permtype = -1;
50925 
50926     /*
50927      * Easy case - CRS matrix in lower triangle, no conversion or transposition is needed
50928      */
50929     if( sparseiscrs(a, _state)&&!isupper )
50930     {
50931         result = spsymmanalyze(a, facttype, permtype, &analysis.analysis, _state);
50932         if( !result )
50933         {
50934             ae_frame_leave(_state);
50935             return result;
50936         }
50937         result = spsymmfactorize(&analysis.analysis, _state);
50938         if( !result )
50939         {
50940             ae_frame_leave(_state);
50941             return result;
50942         }
50943         spsymmextract(&analysis.analysis, a, &dummyd, &dummyp, _state);
50944         ae_frame_leave(_state);
50945         return result;
50946     }
50947 
50948     /*
50949      * A bit more complex - we need conversion and/or transposition
50950      */
50951     if( isupper )
50952     {
50953         sparsecopytocrsbuf(a, &analysis.wrkat, _state);
50954         sparsecopytransposecrsbuf(&analysis.wrkat, &analysis.wrka, _state);
50955     }
50956     else
50957     {
50958         sparsecopytocrsbuf(a, &analysis.wrka, _state);
50959     }
50960     result = spsymmanalyze(&analysis.wrka, facttype, permtype, &analysis.analysis, _state);
50961     if( !result )
50962     {
50963         ae_frame_leave(_state);
50964         return result;
50965     }
50966     result = spsymmfactorize(&analysis.analysis, _state);
50967     if( !result )
50968     {
50969         ae_frame_leave(_state);
50970         return result;
50971     }
50972     spsymmextract(&analysis.analysis, &analysis.wrka, &dummyd, &dummyp, _state);
50973     if( isupper )
50974     {
50975         sparsecopytransposecrsbuf(&analysis.wrka, a, _state);
50976     }
50977     else
50978     {
50979         sparsecopybuf(&analysis.wrka, a, _state);
50980     }
50981     ae_frame_leave(_state);
50982     return result;
50983 }
50984 
50985 
50986 /*************************************************************************
50987 Sparse Cholesky decomposition for a matrix  stored  in  any sparse storage
50988 format, with performance-enhancing permutation of rows/cols.
50989 
50990 Present version is configured  to  perform  supernodal  permutation  which
50991 sparsity reducing ordering.
50992 
50993 This function is a wrapper around generic sparse  decomposition  functions
50994 that internally:
50995 * calls SparseCholeskyAnalyze()  function  to  perform  symbolic  analysis
50996   phase with best available permutation being configured.
50997 * calls SparseCholeskyFactorize() function to perform numerical  phase  of
50998   the factorization.
50999 
51000 NOTE: using  SparseCholeskyAnalyze() and SparseCholeskyFactorize() directly
51001       may improve  performance  of  repetitive  factorizations  with  same
51002       sparsity patterns. It also allows one to perform  LDLT factorization
51003       of  indefinite  matrix  -  a factorization with strictly diagonal D,
51004       which  is  known to be stable only in few special cases, like quasi-
51005       definite matrices.
51006 
51007 INPUT PARAMETERS:
51008     A       -   a square NxN sparse matrix, stored in any storage format.
51009     IsUpper -   if IsUpper=True, then factorization is performed on  upper
51010                 triangle.  Another triangle is ignored on  input,  dropped
51011                 on output. Similarly, if IsUpper=False, the lower triangle
51012                 is processed.
51013 
51014 OUTPUT PARAMETERS:
51015     A       -   the result of factorization, stored in CRS format:
51016                 * if IsUpper=True, then the upper triangle contains matrix
51017                   U such  that  A = U^T*U and the lower triangle is empty.
51018                 * similarly, if IsUpper=False, then lower triangular L  is
51019                   returned and we have A = L*(L^T).
51020     P       -   a row/column permutation, a product of P0*P1*...*Pk, k=N-1,
51021                 with Pi being permutation of rows/cols I and P[I]
51022 
51023 RESULT:
51024     If  the  matrix  is  positive-definite,  the  function  returns  True.
51025     Otherwise, the function returns False.  Contents  of  A  is  undefined
51026     in such case.
51027 
51028 NOTE: for  performance  reasons  this  function  does NOT check that input
51029       matrix  includes  only  finite  values. It is your responsibility to
51030       make sure that there are no infinite or NAN values in the matrix.
51031 
51032   -- ALGLIB routine --
51033      16.09.2020
51034      Bochkanov Sergey
51035 *************************************************************************/
sparsecholeskyp(sparsematrix * a,ae_bool isupper,ae_vector * p,ae_state * _state)51036 ae_bool sparsecholeskyp(sparsematrix* a,
51037      ae_bool isupper,
51038      /* Integer */ ae_vector* p,
51039      ae_state *_state)
51040 {
51041     ae_frame _frame_block;
51042     sparsedecompositionanalysis analysis;
51043     ae_vector dummyd;
51044     ae_int_t facttype;
51045     ae_int_t permtype;
51046     ae_bool result;
51047 
51048     ae_frame_make(_state, &_frame_block);
51049     memset(&analysis, 0, sizeof(analysis));
51050     memset(&dummyd, 0, sizeof(dummyd));
51051     ae_vector_clear(p);
51052     _sparsedecompositionanalysis_init(&analysis, _state, ae_true);
51053     ae_vector_init(&dummyd, 0, DT_REAL, _state, ae_true);
51054 
51055     ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseCholeskyP: A is not square", _state);
51056 
51057     /*
51058      * Quick exit
51059      */
51060     if( sparsegetnrows(a, _state)==0 )
51061     {
51062         result = ae_true;
51063         ae_frame_leave(_state);
51064         return result;
51065     }
51066 
51067     /*
51068      * Choose factorization and permutation: vanilla Cholesky and best permutation available
51069      */
51070     facttype = 0;
51071     permtype = 0;
51072 
51073     /*
51074      * Easy case - CRS matrix in lower triangle, no conversion or transposition is needed
51075      */
51076     if( sparseiscrs(a, _state)&&!isupper )
51077     {
51078         result = spsymmanalyze(a, facttype, permtype, &analysis.analysis, _state);
51079         if( !result )
51080         {
51081             ae_frame_leave(_state);
51082             return result;
51083         }
51084         result = spsymmfactorize(&analysis.analysis, _state);
51085         if( !result )
51086         {
51087             ae_frame_leave(_state);
51088             return result;
51089         }
51090         spsymmextract(&analysis.analysis, a, &dummyd, p, _state);
51091         ae_frame_leave(_state);
51092         return result;
51093     }
51094 
51095     /*
51096      * A bit more complex - we need conversion and/or transposition
51097      */
51098     if( isupper )
51099     {
51100         sparsecopytocrsbuf(a, &analysis.wrkat, _state);
51101         sparsecopytransposecrsbuf(&analysis.wrkat, &analysis.wrka, _state);
51102     }
51103     else
51104     {
51105         sparsecopytocrsbuf(a, &analysis.wrka, _state);
51106     }
51107     result = spsymmanalyze(&analysis.wrka, facttype, permtype, &analysis.analysis, _state);
51108     if( !result )
51109     {
51110         ae_frame_leave(_state);
51111         return result;
51112     }
51113     result = spsymmfactorize(&analysis.analysis, _state);
51114     if( !result )
51115     {
51116         ae_frame_leave(_state);
51117         return result;
51118     }
51119     spsymmextract(&analysis.analysis, &analysis.wrka, &dummyd, p, _state);
51120     if( isupper )
51121     {
51122         sparsecopytransposecrsbuf(&analysis.wrka, a, _state);
51123     }
51124     else
51125     {
51126         sparsecopybuf(&analysis.wrka, a, _state);
51127     }
51128     ae_frame_leave(_state);
51129     return result;
51130 }
51131 
51132 
51133 /*************************************************************************
51134 Sparse Cholesky/LDLT decomposition: symbolic analysis phase.
51135 
51136 This function is a part of the 'expert' sparse Cholesky API:
51137 * SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
51138   matrix to be factorized into internal storage
51139 * SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
51140   with lower bounds on pivot magnitudes and additional overflow safeguards
51141 * SparseCholeskyFactorize(),  that performs  numeric  factorization  using
51142   precomputed symbolic analysis and internally stored matrix - and outputs
51143   result
51144 * SparseCholeskyReload(), that reloads one more matrix with same  sparsity
51145   pattern into internal storage so  one  may  reuse  previously  allocated
51146   temporaries and previously performed symbolic analysis
51147 
51148 This specific function performs preliminary analysis of the  Cholesky/LDLT
51149 factorization. It allows to choose  different  permutation  types  and  to
51150 choose between classic Cholesky and  indefinite  LDLT  factorization  (the
51151 latter is computed with strictly diagonal D, i.e.  without  Bunch-Kauffman
51152 pivoting).
51153 
51154 NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
51155       matrices. However, numerical stability is guaranteed ONLY for a class
51156       of quasi-definite matrices.
51157 
51158 NOTE: all internal processing is performed with lower triangular  matrices
51159       stored  in  CRS  format.  Any  other  storage  formats  and/or upper
51160       triangular storage means  that  one  format  conversion  and/or  one
51161       transposition will be performed  internally  for  the  analysis  and
51162       factorization phases. Thus, highest  performance  is  achieved  when
51163       input is a lower triangular CRS matrix.
51164 
51165 INPUT PARAMETERS:
51166     A           -   sparse square matrix in any sparse storage format.
51167     IsUpper     -   whether upper or lower  triangle  is  decomposed  (the
51168                     other one is ignored).
51169     FactType    -   factorization type:
51170                     * 0 for traditional Cholesky of SPD matrix
51171                     * 1 for LDLT decomposition with strictly  diagonal  D,
51172                         which may have non-positive entries.
51173     PermType    -   permutation type:
51174                     *-1 for absence of permutation
51175                     * 0 for best fill-in reducing  permutation  available,
51176                         which is 3 in the current version
51177                     * 1 for supernodal ordering (improves locality and
51178                       performance, does NOT change fill-in factor)
51179                     * 2 for original AMD ordering
51180                     * 3 for  improved  AMD  (approximate  minimum  degree)
51181                         ordering with better  handling  of  matrices  with
51182                         dense rows/columns
51183 
51184 OUTPUT PARAMETERS:
51185     Analysis    -   contains:
51186                     * symbolic analysis of the matrix structure which will
51187                       be used later to guide numerical factorization.
51188                     * specific numeric values loaded into internal  memory
51189                       waiting for the factorization to be performed
51190 
51191 This function fails if and only if the matrix A is symbolically degenerate
51192 i.e. has diagonal element which is exactly zero. In  such  case  False  is
51193 returned, contents of Analysis object is undefined.
51194 
51195   -- ALGLIB routine --
51196      20.09.2020
51197      Bochkanov Sergey
51198 *************************************************************************/
sparsecholeskyanalyze(sparsematrix * a,ae_bool isupper,ae_int_t facttype,ae_int_t permtype,sparsedecompositionanalysis * analysis,ae_state * _state)51199 ae_bool sparsecholeskyanalyze(sparsematrix* a,
51200      ae_bool isupper,
51201      ae_int_t facttype,
51202      ae_int_t permtype,
51203      sparsedecompositionanalysis* analysis,
51204      ae_state *_state)
51205 {
51206     ae_bool result;
51207 
51208     _sparsedecompositionanalysis_clear(analysis);
51209 
51210     ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseCholeskyAnalyze: A is not square", _state);
51211     ae_assert(facttype==0||facttype==1, "SparseCholeskyAnalyze: unexpected FactType", _state);
51212     ae_assert((((((permtype==0||permtype==1)||permtype==2)||permtype==3)||permtype==-1)||permtype==-2)||permtype==-3, "SparseCholeskyAnalyze: unexpected PermType", _state);
51213     analysis->n = sparsegetnrows(a, _state);
51214     analysis->facttype = facttype;
51215     analysis->permtype = permtype;
51216     if( !sparseiscrs(a, _state) )
51217     {
51218 
51219         /*
51220          * The matrix is stored in non-CRS format. First, we have to convert
51221          * it to CRS. Then we may need to transpose it in order to get lower
51222          * triangular one (as supported by SPSymmAnalyze).
51223          */
51224         sparsecopytocrs(a, &analysis->crsa, _state);
51225         if( isupper )
51226         {
51227             sparsecopytransposecrsbuf(&analysis->crsa, &analysis->crsat, _state);
51228             result = spsymmanalyze(&analysis->crsat, facttype, permtype, &analysis->analysis, _state);
51229         }
51230         else
51231         {
51232             result = spsymmanalyze(&analysis->crsa, facttype, permtype, &analysis->analysis, _state);
51233         }
51234     }
51235     else
51236     {
51237 
51238         /*
51239          * The matrix is stored in CRS format. However we may need to
51240          * transpose it in order to get lower triangular one (as supported
51241          * by SPSymmAnalyze).
51242          */
51243         if( isupper )
51244         {
51245             sparsecopytransposecrsbuf(a, &analysis->crsat, _state);
51246             result = spsymmanalyze(&analysis->crsat, facttype, permtype, &analysis->analysis, _state);
51247         }
51248         else
51249         {
51250             result = spsymmanalyze(a, facttype, permtype, &analysis->analysis, _state);
51251         }
51252     }
51253     return result;
51254 }
51255 
51256 
51257 /*************************************************************************
51258 Allows to control stability-improving  modification  strategy  for  sparse
51259 Cholesky/LDLT decompositions. Modified Cholesky is more  robust  than  its
51260 unmodified counterpart.
51261 
51262 This function is a part of the 'expert' sparse Cholesky API:
51263 * SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
51264   matrix to be factorized into internal storage
51265 * SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
51266   with lower bounds on pivot magnitudes and additional overflow safeguards
51267 * SparseCholeskyFactorize(),  that performs  numeric  factorization  using
51268   precomputed symbolic analysis and internally stored matrix - and outputs
51269   result
51270 * SparseCholeskyReload(), that reloads one more matrix with same  sparsity
51271   pattern into internal storage so  one  may  reuse  previously  allocated
51272   temporaries and previously performed symbolic analysis
51273 
51274 INPUT PARAMETERS:
51275     Analysis    -   symbolic analysis of the matrix structure
51276     ModStrategy -   modification type:
51277                     * 0 for traditional Cholesky/LDLT (Cholesky fails when
51278                       encounters nonpositive pivot, LDLT fails  when  zero
51279                       pivot   is  encountered,  no  stability  checks  for
51280                       overflows/underflows)
51281                     * 1 for modified Cholesky with additional checks:
51282                       * pivots less than ModParam0 are increased; (similar
51283                         sign-preserving procedure is applied during LDLT)
51284                       * if,  at  some  moment,  sum  of absolute values of
51285                         elements in column  J  will  become  greater  than
51286                         ModParam1, Cholesky/LDLT will treat it as  failure
51287                         and will stop immediately
51288     P0, P1, P2,P3 - modification parameters #0 #1, #2 and #3.
51289                     Params #2 and #3 are ignored in current version.
51290 
51291 OUTPUT PARAMETERS:
51292     Analysis    -   symbolic analysis of the matrix structure, new strategy
51293                     Results will be seen with next SparseCholeskyFactorize()
51294                     call.
51295 
51296   -- ALGLIB routine --
51297      20.09.2020
51298      Bochkanov Sergey
51299 *************************************************************************/
sparsecholeskysetmodtype(sparsedecompositionanalysis * analysis,ae_int_t modstrategy,double p0,double p1,double p2,double p3,ae_state * _state)51300 void sparsecholeskysetmodtype(sparsedecompositionanalysis* analysis,
51301      ae_int_t modstrategy,
51302      double p0,
51303      double p1,
51304      double p2,
51305      double p3,
51306      ae_state *_state)
51307 {
51308 
51309 
51310     spsymmsetmodificationstrategy(&analysis->analysis, modstrategy, p0, p1, p2, p3, _state);
51311 }
51312 
51313 
51314 /*************************************************************************
51315 Sparse Cholesky decomposition: numerical analysis phase.
51316 
51317 This function is a part of the 'expert' sparse Cholesky API:
51318 * SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
51319   matrix to be factorized into internal storage
51320 * SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
51321   with lower bounds on pivot magnitudes and additional overflow safeguards
51322 * SparseCholeskyFactorize(),  that performs  numeric  factorization  using
51323   precomputed symbolic analysis and internally stored matrix - and outputs
51324   result
51325 * SparseCholeskyReload(), that reloads one more matrix with same  sparsity
51326   pattern into internal storage so  one  may  reuse  previously  allocated
51327   temporaries and previously performed symbolic analysis
51328 
51329 Depending on settings specified during SparseCholeskyAnalyze() call it may
51330 produce classic Cholesky or L*D*LT  decomposition  (with strictly diagonal
51331 D), without permutation or with performance-enhancing permutation P.
51332 
51333 NOTE: all internal processing is performed with lower triangular  matrices
51334       stored  in  CRS  format.  Any  other  storage  formats  and/or upper
51335       triangular storage means  that  one  format  conversion  and/or  one
51336       transposition will be performed  internally  for  the  analysis  and
51337       factorization phases. Thus, highest  performance  is  achieved  when
51338       input is a lower triangular CRS matrix, and lower triangular  output
51339       is requested.
51340 
51341 NOTE: L*D*LT family of factorization may be used to  factorize  indefinite
51342       matrices. However, numerical stability is guaranteed ONLY for a class
51343       of quasi-definite matrices.
51344 
51345 INPUT PARAMETERS:
51346     Analysis    -   prior analysis with internally stored matrix which will
51347                     be factorized
51348     NeedUpper   -   whether upper triangular or lower triangular output is
51349                     needed
51350 
51351 OUTPUT PARAMETERS:
51352     A           -   Cholesky decomposition of A stored in lower triangular
51353                     CRS format, i.e. A=L*L' (or upper triangular CRS, with
51354                     A=U'*U, depending on NeedUpper parameter).
51355     D           -   array[N], diagonal factor. If no diagonal  factor  was
51356                     required during analysis  phase,  still  returned  but
51357                     filled with 1's
51358     P           -   array[N], pivots. Permutation matrix P is a product of
51359                     P(0)*P(1)*...*P(N-1), where P(i) is a  permutation  of
51360                     row/col I and P[I] (with P[I]>=I).
51361                     If no permutation was requested during analysis phase,
51362                     still returned but filled with identity permutation.
51363 
51364 The function returns True  when  factorization  resulted  in nondegenerate
51365 matrix. False is returned when factorization fails (Cholesky factorization
51366 of indefinite matrix) or LDLT factorization has exactly zero  elements  at
51367 the diagonal. In the latter case contents of A, D and P is undefined.
51368 
51369 The analysis object is not changed during  the  factorization.  Subsequent
51370 calls to SparseCholeskyFactorize() will result in same factorization being
51371 performed one more time.
51372 
51373   -- ALGLIB routine --
51374      20.09.2020
51375      Bochkanov Sergey
51376 *************************************************************************/
sparsecholeskyfactorize(sparsedecompositionanalysis * analysis,ae_bool needupper,sparsematrix * a,ae_vector * d,ae_vector * p,ae_state * _state)51377 ae_bool sparsecholeskyfactorize(sparsedecompositionanalysis* analysis,
51378      ae_bool needupper,
51379      sparsematrix* a,
51380      /* Real    */ ae_vector* d,
51381      /* Integer */ ae_vector* p,
51382      ae_state *_state)
51383 {
51384     ae_bool result;
51385 
51386     _sparsematrix_clear(a);
51387     ae_vector_clear(d);
51388     ae_vector_clear(p);
51389 
51390     if( needupper )
51391     {
51392         result = spsymmfactorize(&analysis->analysis, _state);
51393         if( !result )
51394         {
51395             return result;
51396         }
51397         spsymmextract(&analysis->analysis, &analysis->wrka, d, p, _state);
51398         sparsecopytransposecrsbuf(&analysis->wrka, a, _state);
51399     }
51400     else
51401     {
51402         result = spsymmfactorize(&analysis->analysis, _state);
51403         if( !result )
51404         {
51405             return result;
51406         }
51407         spsymmextract(&analysis->analysis, a, d, p, _state);
51408     }
51409     return result;
51410 }
51411 
51412 
51413 /*************************************************************************
51414 Sparse  Cholesky  decomposition:  update  internally  stored  matrix  with
51415 another one with exactly same sparsity pattern.
51416 
51417 This function is a part of the 'expert' sparse Cholesky API:
51418 * SparseCholeskyAnalyze(), that performs symbolic analysis phase and loads
51419   matrix to be factorized into internal storage
51420 * SparseCholeskySetModType(), that allows to  use  modified  Cholesky/LDLT
51421   with lower bounds on pivot magnitudes and additional overflow safeguards
51422 * SparseCholeskyFactorize(),  that performs  numeric  factorization  using
51423   precomputed symbolic analysis and internally stored matrix - and outputs
51424   result
51425 * SparseCholeskyReload(), that reloads one more matrix with same  sparsity
51426   pattern into internal storage so  one  may  reuse  previously  allocated
51427   temporaries and previously performed symbolic analysis
51428 
51429 This specific function replaces internally stored  numerical  values  with
51430 ones from another sparse matrix (but having exactly same sparsity  pattern
51431 as one that was used for initial SparseCholeskyAnalyze() call).
51432 
51433 NOTE: all internal processing is performed with lower triangular  matrices
51434       stored  in  CRS  format.  Any  other  storage  formats  and/or upper
51435       triangular storage means  that  one  format  conversion  and/or  one
51436       transposition will be performed  internally  for  the  analysis  and
51437       factorization phases. Thus, highest  performance  is  achieved  when
51438       input is a lower triangular CRS matrix.
51439 
51440 INPUT PARAMETERS:
51441     Analysis    -   analysis object
51442     A           -   sparse square matrix in any sparse storage format.  It
51443                     MUST have exactly same sparsity pattern as that of the
51444                     matrix that was passed to SparseCholeskyAnalyze().
51445                     Any difference (missing elements or additional elements)
51446                     may result in unpredictable and undefined  behavior  -
51447                     an algorithm may fail due to memory access violation.
51448     IsUpper     -   whether upper or lower  triangle  is  decomposed  (the
51449                     other one is ignored).
51450 
51451 OUTPUT PARAMETERS:
51452     Analysis    -   contains:
51453                     * symbolic analysis of the matrix structure which will
51454                       be used later to guide numerical factorization.
51455                     * specific numeric values loaded into internal  memory
51456                       waiting for the factorization to be performed
51457 
51458   -- ALGLIB routine --
51459      20.09.2020
51460      Bochkanov Sergey
51461 *************************************************************************/
sparsecholeskyreload(sparsedecompositionanalysis * analysis,sparsematrix * a,ae_bool isupper,ae_state * _state)51462 void sparsecholeskyreload(sparsedecompositionanalysis* analysis,
51463      sparsematrix* a,
51464      ae_bool isupper,
51465      ae_state *_state)
51466 {
51467 
51468 
51469     ae_assert(sparsegetnrows(a, _state)==sparsegetncols(a, _state), "SparseCholeskyReload: A is not square", _state);
51470     ae_assert(sparsegetnrows(a, _state)==analysis->n, "SparseCholeskyReload: size of A does not match that stored in Analysis", _state);
51471     if( !sparseiscrs(a, _state) )
51472     {
51473 
51474         /*
51475          * The matrix is stored in non-CRS format. First, we have to convert
51476          * it to CRS. Then we may need to transpose it in order to get lower
51477          * triangular one (as supported by SPSymmAnalyze).
51478          */
51479         sparsecopytocrs(a, &analysis->crsa, _state);
51480         if( isupper )
51481         {
51482             sparsecopytransposecrsbuf(&analysis->crsa, &analysis->crsat, _state);
51483             spsymmreload(&analysis->analysis, &analysis->crsat, _state);
51484         }
51485         else
51486         {
51487             spsymmreload(&analysis->analysis, &analysis->crsa, _state);
51488         }
51489     }
51490     else
51491     {
51492 
51493         /*
51494          * The matrix is stored in CRS format. However we may need to
51495          * transpose it in order to get lower triangular one (as supported
51496          * by SPSymmAnalyze).
51497          */
51498         if( isupper )
51499         {
51500             sparsecopytransposecrsbuf(a, &analysis->crsat, _state);
51501             spsymmreload(&analysis->analysis, &analysis->crsat, _state);
51502         }
51503         else
51504         {
51505             spsymmreload(&analysis->analysis, a, _state);
51506         }
51507     }
51508 }
51509 
51510 
rmatrixlup(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_state * _state)51511 void rmatrixlup(/* Real    */ ae_matrix* a,
51512      ae_int_t m,
51513      ae_int_t n,
51514      /* Integer */ ae_vector* pivots,
51515      ae_state *_state)
51516 {
51517     ae_frame _frame_block;
51518     ae_vector tmp;
51519     ae_int_t i;
51520     ae_int_t j;
51521     double mx;
51522     double v;
51523 
51524     ae_frame_make(_state, &_frame_block);
51525     memset(&tmp, 0, sizeof(tmp));
51526     ae_vector_clear(pivots);
51527     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
51528 
51529 
51530     /*
51531      * Internal LU decomposition subroutine.
51532      * Never call it directly.
51533      */
51534     ae_assert(m>0, "RMatrixLUP: incorrect M!", _state);
51535     ae_assert(n>0, "RMatrixLUP: incorrect N!", _state);
51536 
51537     /*
51538      * Scale matrix to avoid overflows,
51539      * decompose it, then scale back.
51540      */
51541     mx = (double)(0);
51542     for(i=0; i<=m-1; i++)
51543     {
51544         for(j=0; j<=n-1; j++)
51545         {
51546             mx = ae_maxreal(mx, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
51547         }
51548     }
51549     if( ae_fp_neq(mx,(double)(0)) )
51550     {
51551         v = 1/mx;
51552         for(i=0; i<=m-1; i++)
51553         {
51554             ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
51555         }
51556     }
51557     ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
51558     ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
51559     rmatrixluprec(a, 0, m, n, pivots, &tmp, _state);
51560     if( ae_fp_neq(mx,(double)(0)) )
51561     {
51562         v = mx;
51563         for(i=0; i<=m-1; i++)
51564         {
51565             ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,ae_minint(i, n-1, _state)), v);
51566         }
51567     }
51568     ae_frame_leave(_state);
51569 }
51570 
51571 
cmatrixlup(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_state * _state)51572 void cmatrixlup(/* Complex */ ae_matrix* a,
51573      ae_int_t m,
51574      ae_int_t n,
51575      /* Integer */ ae_vector* pivots,
51576      ae_state *_state)
51577 {
51578     ae_frame _frame_block;
51579     ae_vector tmp;
51580     ae_int_t i;
51581     ae_int_t j;
51582     double mx;
51583     double v;
51584 
51585     ae_frame_make(_state, &_frame_block);
51586     memset(&tmp, 0, sizeof(tmp));
51587     ae_vector_clear(pivots);
51588     ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
51589 
51590 
51591     /*
51592      * Internal LU decomposition subroutine.
51593      * Never call it directly.
51594      */
51595     ae_assert(m>0, "CMatrixLUP: incorrect M!", _state);
51596     ae_assert(n>0, "CMatrixLUP: incorrect N!", _state);
51597 
51598     /*
51599      * Scale matrix to avoid overflows,
51600      * decompose it, then scale back.
51601      */
51602     mx = (double)(0);
51603     for(i=0; i<=m-1; i++)
51604     {
51605         for(j=0; j<=n-1; j++)
51606         {
51607             mx = ae_maxreal(mx, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state);
51608         }
51609     }
51610     if( ae_fp_neq(mx,(double)(0)) )
51611     {
51612         v = 1/mx;
51613         for(i=0; i<=m-1; i++)
51614         {
51615             ae_v_cmuld(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), v);
51616         }
51617     }
51618     ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
51619     ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
51620     cmatrixluprec(a, 0, m, n, pivots, &tmp, _state);
51621     if( ae_fp_neq(mx,(double)(0)) )
51622     {
51623         v = mx;
51624         for(i=0; i<=m-1; i++)
51625         {
51626             ae_v_cmuld(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,ae_minint(i, n-1, _state)), v);
51627         }
51628     }
51629     ae_frame_leave(_state);
51630 }
51631 
51632 
rmatrixplu(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_state * _state)51633 void rmatrixplu(/* Real    */ ae_matrix* a,
51634      ae_int_t m,
51635      ae_int_t n,
51636      /* Integer */ ae_vector* pivots,
51637      ae_state *_state)
51638 {
51639     ae_frame _frame_block;
51640     ae_vector tmp;
51641     ae_int_t i;
51642     ae_int_t j;
51643     double mx;
51644     double v;
51645 
51646     ae_frame_make(_state, &_frame_block);
51647     memset(&tmp, 0, sizeof(tmp));
51648     ae_vector_clear(pivots);
51649     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
51650 
51651 
51652     /*
51653      * Internal LU decomposition subroutine.
51654      * Never call it directly.
51655      */
51656     ae_assert(m>0, "RMatrixPLU: incorrect M!", _state);
51657     ae_assert(n>0, "RMatrixPLU: incorrect N!", _state);
51658     ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
51659     ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
51660 
51661     /*
51662      * Scale matrix to avoid overflows,
51663      * decompose it, then scale back.
51664      */
51665     mx = (double)(0);
51666     for(i=0; i<=m-1; i++)
51667     {
51668         for(j=0; j<=n-1; j++)
51669         {
51670             mx = ae_maxreal(mx, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
51671         }
51672     }
51673     if( ae_fp_neq(mx,(double)(0)) )
51674     {
51675         v = 1/mx;
51676         for(i=0; i<=m-1; i++)
51677         {
51678             ae_v_muld(&a->ptr.pp_double[i][0], 1, ae_v_len(0,n-1), v);
51679         }
51680     }
51681     rmatrixplurec(a, 0, m, n, pivots, &tmp, _state);
51682     if( ae_fp_neq(mx,(double)(0)) )
51683     {
51684         v = mx;
51685         for(i=0; i<=ae_minint(m, n, _state)-1; i++)
51686         {
51687             ae_v_muld(&a->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v);
51688         }
51689     }
51690     ae_frame_leave(_state);
51691 }
51692 
51693 
cmatrixplu(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_state * _state)51694 void cmatrixplu(/* Complex */ ae_matrix* a,
51695      ae_int_t m,
51696      ae_int_t n,
51697      /* Integer */ ae_vector* pivots,
51698      ae_state *_state)
51699 {
51700     ae_frame _frame_block;
51701     ae_vector tmp;
51702     ae_int_t i;
51703     ae_int_t j;
51704     double mx;
51705     ae_complex v;
51706 
51707     ae_frame_make(_state, &_frame_block);
51708     memset(&tmp, 0, sizeof(tmp));
51709     ae_vector_clear(pivots);
51710     ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
51711 
51712 
51713     /*
51714      * Internal LU decomposition subroutine.
51715      * Never call it directly.
51716      */
51717     ae_assert(m>0, "CMatrixPLU: incorrect M!", _state);
51718     ae_assert(n>0, "CMatrixPLU: incorrect N!", _state);
51719     ae_vector_set_length(&tmp, 2*ae_maxint(m, n, _state), _state);
51720     ae_vector_set_length(pivots, ae_minint(m, n, _state), _state);
51721 
51722     /*
51723      * Scale matrix to avoid overflows,
51724      * decompose it, then scale back.
51725      */
51726     mx = (double)(0);
51727     for(i=0; i<=m-1; i++)
51728     {
51729         for(j=0; j<=n-1; j++)
51730         {
51731             mx = ae_maxreal(mx, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state);
51732         }
51733     }
51734     if( ae_fp_neq(mx,(double)(0)) )
51735     {
51736         v = ae_complex_from_d(1/mx);
51737         for(i=0; i<=m-1; i++)
51738         {
51739             ae_v_cmulc(&a->ptr.pp_complex[i][0], 1, ae_v_len(0,n-1), v);
51740         }
51741     }
51742     cmatrixplurec(a, 0, m, n, pivots, &tmp, _state);
51743     if( ae_fp_neq(mx,(double)(0)) )
51744     {
51745         v = ae_complex_from_d(mx);
51746         for(i=0; i<=ae_minint(m, n, _state)-1; i++)
51747         {
51748             ae_v_cmulc(&a->ptr.pp_complex[i][i], 1, ae_v_len(i,n-1), v);
51749         }
51750     }
51751     ae_frame_leave(_state);
51752 }
51753 
51754 
51755 /*************************************************************************
51756 Advanced interface for SPDMatrixCholesky, performs no temporary allocations.
51757 
51758 INPUT PARAMETERS:
51759     A       -   matrix given by upper or lower triangle
51760     Offs    -   offset of diagonal block to decompose
51761     N       -   diagonal block size
51762     IsUpper -   what half is given
51763     Tmp     -   temporary array; allocated by function, if its size is too
51764                 small; can be reused on subsequent calls.
51765 
51766 OUTPUT PARAMETERS:
51767     A       -   upper (or lower) triangle contains Cholesky decomposition
51768 
51769 RESULT:
51770     True, on success
51771     False, on failure
51772 
51773   -- ALGLIB routine --
51774      15.12.2009
51775      Bochkanov Sergey
51776 *************************************************************************/
spdmatrixcholeskyrec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_vector * tmp,ae_state * _state)51777 ae_bool spdmatrixcholeskyrec(/* Real    */ ae_matrix* a,
51778      ae_int_t offs,
51779      ae_int_t n,
51780      ae_bool isupper,
51781      /* Real    */ ae_vector* tmp,
51782      ae_state *_state)
51783 {
51784     ae_int_t n1;
51785     ae_int_t n2;
51786     ae_int_t tsa;
51787     ae_int_t tsb;
51788     ae_bool result;
51789 
51790 
51791     tsa = matrixtilesizea(_state);
51792     tsb = matrixtilesizeb(_state);
51793 
51794     /*
51795      * Allocate temporaries
51796      */
51797     if( tmp->cnt<2*n )
51798     {
51799         ae_vector_set_length(tmp, 2*n, _state);
51800     }
51801 
51802     /*
51803      * Basecases
51804      */
51805     if( n<1 )
51806     {
51807         result = ae_false;
51808         return result;
51809     }
51810     if( n==1 )
51811     {
51812         if( ae_fp_greater(a->ptr.pp_double[offs][offs],(double)(0)) )
51813         {
51814             a->ptr.pp_double[offs][offs] = ae_sqrt(a->ptr.pp_double[offs][offs], _state);
51815             result = ae_true;
51816         }
51817         else
51818         {
51819             result = ae_false;
51820         }
51821         return result;
51822     }
51823     if( n<=tsb )
51824     {
51825         if( spdmatrixcholeskymkl(a, offs, n, isupper, &result, _state) )
51826         {
51827             return result;
51828         }
51829     }
51830     if( n<=tsa )
51831     {
51832         result = trfac_spdmatrixcholesky2(a, offs, n, isupper, tmp, _state);
51833         return result;
51834     }
51835 
51836     /*
51837      * Split task into smaller ones
51838      */
51839     if( n>tsb )
51840     {
51841 
51842         /*
51843          * Split leading B-sized block from the beginning (block-matrix approach)
51844          */
51845         n1 = tsb;
51846         n2 = n-n1;
51847     }
51848     else
51849     {
51850 
51851         /*
51852          * Smaller than B-size, perform cache-oblivious split
51853          */
51854         tiledsplit(n, tsa, &n1, &n2, _state);
51855     }
51856     result = spdmatrixcholeskyrec(a, offs, n1, isupper, tmp, _state);
51857     if( !result )
51858     {
51859         return result;
51860     }
51861     if( n2>0 )
51862     {
51863         if( isupper )
51864         {
51865             rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 1, a, offs, offs+n1, _state);
51866             rmatrixsyrk(n2, n1, -1.0, a, offs, offs+n1, 1, 1.0, a, offs+n1, offs+n1, isupper, _state);
51867         }
51868         else
51869         {
51870             rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 1, a, offs+n1, offs, _state);
51871             rmatrixsyrk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper, _state);
51872         }
51873         result = spdmatrixcholeskyrec(a, offs+n1, n2, isupper, tmp, _state);
51874         if( !result )
51875         {
51876             return result;
51877         }
51878     }
51879     return result;
51880 }
51881 
51882 
51883 /*************************************************************************
51884 Recursive computational subroutine for HPDMatrixCholesky
51885 
51886   -- ALGLIB routine --
51887      15.12.2009
51888      Bochkanov Sergey
51889 *************************************************************************/
trfac_hpdmatrixcholeskyrec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_vector * tmp,ae_state * _state)51890 static ae_bool trfac_hpdmatrixcholeskyrec(/* Complex */ ae_matrix* a,
51891      ae_int_t offs,
51892      ae_int_t n,
51893      ae_bool isupper,
51894      /* Complex */ ae_vector* tmp,
51895      ae_state *_state)
51896 {
51897     ae_int_t n1;
51898     ae_int_t n2;
51899     ae_int_t tsa;
51900     ae_int_t tsb;
51901     ae_bool result;
51902 
51903 
51904     tsa = matrixtilesizea(_state)/2;
51905     tsb = matrixtilesizeb(_state);
51906 
51907     /*
51908      * check N
51909      */
51910     if( n<1 )
51911     {
51912         result = ae_false;
51913         return result;
51914     }
51915 
51916     /*
51917      * Prepare buffer
51918      */
51919     if( tmp->cnt<2*n )
51920     {
51921         ae_vector_set_length(tmp, 2*n, _state);
51922     }
51923 
51924     /*
51925      * Basecases
51926      *
51927      * NOTE: we do not use MKL for basecases because their price is only
51928      *       minor part of overall running time for N>256.
51929      */
51930     if( n==1 )
51931     {
51932         if( ae_fp_greater(a->ptr.pp_complex[offs][offs].x,(double)(0)) )
51933         {
51934             a->ptr.pp_complex[offs][offs] = ae_complex_from_d(ae_sqrt(a->ptr.pp_complex[offs][offs].x, _state));
51935             result = ae_true;
51936         }
51937         else
51938         {
51939             result = ae_false;
51940         }
51941         return result;
51942     }
51943     if( n<=tsa )
51944     {
51945         result = trfac_hpdmatrixcholesky2(a, offs, n, isupper, tmp, _state);
51946         return result;
51947     }
51948 
51949     /*
51950      * Split task into smaller ones
51951      */
51952     if( n>tsb )
51953     {
51954 
51955         /*
51956          * Split leading B-sized block from the beginning (block-matrix approach)
51957          */
51958         n1 = tsb;
51959         n2 = n-n1;
51960     }
51961     else
51962     {
51963 
51964         /*
51965          * Smaller than B-size, perform cache-oblivious split
51966          */
51967         tiledsplit(n, tsa, &n1, &n2, _state);
51968     }
51969     result = trfac_hpdmatrixcholeskyrec(a, offs, n1, isupper, tmp, _state);
51970     if( !result )
51971     {
51972         return result;
51973     }
51974     if( n2>0 )
51975     {
51976         if( isupper )
51977         {
51978             cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 2, a, offs, offs+n1, _state);
51979             cmatrixherk(n2, n1, -1.0, a, offs, offs+n1, 2, 1.0, a, offs+n1, offs+n1, isupper, _state);
51980         }
51981         else
51982         {
51983             cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 2, a, offs+n1, offs, _state);
51984             cmatrixherk(n2, n1, -1.0, a, offs+n1, offs, 0, 1.0, a, offs+n1, offs+n1, isupper, _state);
51985         }
51986         result = trfac_hpdmatrixcholeskyrec(a, offs+n1, n2, isupper, tmp, _state);
51987         if( !result )
51988         {
51989             return result;
51990         }
51991     }
51992     return result;
51993 }
51994 
51995 
51996 /*************************************************************************
51997 Level-2 Hermitian Cholesky subroutine.
51998 
51999   -- LAPACK routine (version 3.0) --
52000      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
52001      Courant Institute, Argonne National Lab, and Rice University
52002      February 29, 1992
52003 *************************************************************************/
trfac_hpdmatrixcholesky2(ae_matrix * aaa,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_vector * tmp,ae_state * _state)52004 static ae_bool trfac_hpdmatrixcholesky2(/* Complex */ ae_matrix* aaa,
52005      ae_int_t offs,
52006      ae_int_t n,
52007      ae_bool isupper,
52008      /* Complex */ ae_vector* tmp,
52009      ae_state *_state)
52010 {
52011     ae_int_t i;
52012     ae_int_t j;
52013     double ajj;
52014     ae_complex v;
52015     double r;
52016     ae_bool result;
52017 
52018 
52019     result = ae_true;
52020     if( n<0 )
52021     {
52022         result = ae_false;
52023         return result;
52024     }
52025 
52026     /*
52027      * Quick return if possible
52028      */
52029     if( n==0 )
52030     {
52031         return result;
52032     }
52033     if( isupper )
52034     {
52035 
52036         /*
52037          * Compute the Cholesky factorization A = U'*U.
52038          */
52039         for(j=0; j<=n-1; j++)
52040         {
52041 
52042             /*
52043              * Compute U(J,J) and test for non-positive-definiteness.
52044              */
52045             v = ae_v_cdotproduct(&aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "Conj", &aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "N", ae_v_len(offs,offs+j-1));
52046             ajj = ae_c_sub(aaa->ptr.pp_complex[offs+j][offs+j],v).x;
52047             if( ae_fp_less_eq(ajj,(double)(0)) )
52048             {
52049                 aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
52050                 result = ae_false;
52051                 return result;
52052             }
52053             ajj = ae_sqrt(ajj, _state);
52054             aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
52055 
52056             /*
52057              * Compute elements J+1:N-1 of row J.
52058              */
52059             if( j<n-1 )
52060             {
52061                 if( j>0 )
52062                 {
52063                     ae_v_cmoveneg(&tmp->ptr.p_complex[0], 1, &aaa->ptr.pp_complex[offs][offs+j], aaa->stride, "Conj", ae_v_len(0,j-1));
52064                     cmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, tmp, n, _state);
52065                     ae_v_cadd(&aaa->ptr.pp_complex[offs+j][offs+j+1], 1, &tmp->ptr.p_complex[n], 1, "N", ae_v_len(offs+j+1,offs+n-1));
52066                 }
52067                 r = 1/ajj;
52068                 ae_v_cmuld(&aaa->ptr.pp_complex[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r);
52069             }
52070         }
52071     }
52072     else
52073     {
52074 
52075         /*
52076          * Compute the Cholesky factorization A = L*L'.
52077          */
52078         for(j=0; j<=n-1; j++)
52079         {
52080 
52081             /*
52082              * Compute L(J+1,J+1) and test for non-positive-definiteness.
52083              */
52084             v = ae_v_cdotproduct(&aaa->ptr.pp_complex[offs+j][offs], 1, "Conj", &aaa->ptr.pp_complex[offs+j][offs], 1, "N", ae_v_len(offs,offs+j-1));
52085             ajj = ae_c_sub(aaa->ptr.pp_complex[offs+j][offs+j],v).x;
52086             if( ae_fp_less_eq(ajj,(double)(0)) )
52087             {
52088                 aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
52089                 result = ae_false;
52090                 return result;
52091             }
52092             ajj = ae_sqrt(ajj, _state);
52093             aaa->ptr.pp_complex[offs+j][offs+j] = ae_complex_from_d(ajj);
52094 
52095             /*
52096              * Compute elements J+1:N of column J.
52097              */
52098             if( j<n-1 )
52099             {
52100                 r = 1/ajj;
52101                 if( j>0 )
52102                 {
52103                     ae_v_cmove(&tmp->ptr.p_complex[0], 1, &aaa->ptr.pp_complex[offs+j][offs], 1, "Conj", ae_v_len(0,j-1));
52104                     cmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, tmp, n, _state);
52105                     for(i=0; i<=n-j-2; i++)
52106                     {
52107                         aaa->ptr.pp_complex[offs+j+1+i][offs+j] = ae_c_mul_d(ae_c_sub(aaa->ptr.pp_complex[offs+j+1+i][offs+j],tmp->ptr.p_complex[n+i]),r);
52108                     }
52109                 }
52110                 else
52111                 {
52112                     for(i=0; i<=n-j-2; i++)
52113                     {
52114                         aaa->ptr.pp_complex[offs+j+1+i][offs+j] = ae_c_mul_d(aaa->ptr.pp_complex[offs+j+1+i][offs+j],r);
52115                     }
52116                 }
52117             }
52118         }
52119     }
52120     return result;
52121 }
52122 
52123 
52124 /*************************************************************************
52125 Level-2 Cholesky subroutine
52126 
52127   -- LAPACK routine (version 3.0) --
52128      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
52129      Courant Institute, Argonne National Lab, and Rice University
52130      February 29, 1992
52131 *************************************************************************/
trfac_spdmatrixcholesky2(ae_matrix * aaa,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_vector * tmp,ae_state * _state)52132 static ae_bool trfac_spdmatrixcholesky2(/* Real    */ ae_matrix* aaa,
52133      ae_int_t offs,
52134      ae_int_t n,
52135      ae_bool isupper,
52136      /* Real    */ ae_vector* tmp,
52137      ae_state *_state)
52138 {
52139     ae_int_t i;
52140     ae_int_t j;
52141     double ajj;
52142     double v;
52143     double r;
52144     ae_bool result;
52145 
52146 
52147     result = ae_true;
52148     if( n<0 )
52149     {
52150         result = ae_false;
52151         return result;
52152     }
52153 
52154     /*
52155      * Quick return if possible
52156      */
52157     if( n==0 )
52158     {
52159         return result;
52160     }
52161     if( isupper )
52162     {
52163 
52164         /*
52165          * Compute the Cholesky factorization A = U'*U.
52166          */
52167         for(j=0; j<=n-1; j++)
52168         {
52169 
52170             /*
52171              * Compute U(J,J) and test for non-positive-definiteness.
52172              */
52173             v = ae_v_dotproduct(&aaa->ptr.pp_double[offs][offs+j], aaa->stride, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(offs,offs+j-1));
52174             ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
52175             if( ae_fp_less_eq(ajj,(double)(0)) )
52176             {
52177                 aaa->ptr.pp_double[offs+j][offs+j] = ajj;
52178                 result = ae_false;
52179                 return result;
52180             }
52181             ajj = ae_sqrt(ajj, _state);
52182             aaa->ptr.pp_double[offs+j][offs+j] = ajj;
52183 
52184             /*
52185              * Compute elements J+1:N-1 of row J.
52186              */
52187             if( j<n-1 )
52188             {
52189                 if( j>0 )
52190                 {
52191                     ae_v_moveneg(&tmp->ptr.p_double[0], 1, &aaa->ptr.pp_double[offs][offs+j], aaa->stride, ae_v_len(0,j-1));
52192                     rmatrixmv(n-j-1, j, aaa, offs, offs+j+1, 1, tmp, 0, tmp, n, _state);
52193                     ae_v_add(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, &tmp->ptr.p_double[n], 1, ae_v_len(offs+j+1,offs+n-1));
52194                 }
52195                 r = 1/ajj;
52196                 ae_v_muld(&aaa->ptr.pp_double[offs+j][offs+j+1], 1, ae_v_len(offs+j+1,offs+n-1), r);
52197             }
52198         }
52199     }
52200     else
52201     {
52202 
52203         /*
52204          * Compute the Cholesky factorization A = L*L'.
52205          */
52206         for(j=0; j<=n-1; j++)
52207         {
52208 
52209             /*
52210              * Compute L(J+1,J+1) and test for non-positive-definiteness.
52211              */
52212             v = ae_v_dotproduct(&aaa->ptr.pp_double[offs+j][offs], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(offs,offs+j-1));
52213             ajj = aaa->ptr.pp_double[offs+j][offs+j]-v;
52214             if( ae_fp_less_eq(ajj,(double)(0)) )
52215             {
52216                 aaa->ptr.pp_double[offs+j][offs+j] = ajj;
52217                 result = ae_false;
52218                 return result;
52219             }
52220             ajj = ae_sqrt(ajj, _state);
52221             aaa->ptr.pp_double[offs+j][offs+j] = ajj;
52222 
52223             /*
52224              * Compute elements J+1:N of column J.
52225              */
52226             if( j<n-1 )
52227             {
52228                 r = 1/ajj;
52229                 if( j>0 )
52230                 {
52231                     ae_v_move(&tmp->ptr.p_double[0], 1, &aaa->ptr.pp_double[offs+j][offs], 1, ae_v_len(0,j-1));
52232                     rmatrixmv(n-j-1, j, aaa, offs+j+1, offs, 0, tmp, 0, tmp, n, _state);
52233                     for(i=0; i<=n-j-2; i++)
52234                     {
52235                         aaa->ptr.pp_double[offs+j+1+i][offs+j] = (aaa->ptr.pp_double[offs+j+1+i][offs+j]-tmp->ptr.p_double[n+i])*r;
52236                     }
52237                 }
52238                 else
52239                 {
52240                     for(i=0; i<=n-j-2; i++)
52241                     {
52242                         aaa->ptr.pp_double[offs+j+1+i][offs+j] = aaa->ptr.pp_double[offs+j+1+i][offs+j]*r;
52243                     }
52244                 }
52245             }
52246         }
52247     }
52248     return result;
52249 }
52250 
52251 
_sparsedecompositionanalysis_init(void * _p,ae_state * _state,ae_bool make_automatic)52252 void _sparsedecompositionanalysis_init(void* _p, ae_state *_state, ae_bool make_automatic)
52253 {
52254     sparsedecompositionanalysis *p = (sparsedecompositionanalysis*)_p;
52255     ae_touch_ptr((void*)p);
52256     _spcholanalysis_init(&p->analysis, _state, make_automatic);
52257     _sparsematrix_init(&p->wrka, _state, make_automatic);
52258     _sparsematrix_init(&p->wrkat, _state, make_automatic);
52259     _sparsematrix_init(&p->crsa, _state, make_automatic);
52260     _sparsematrix_init(&p->crsat, _state, make_automatic);
52261 }
52262 
52263 
_sparsedecompositionanalysis_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)52264 void _sparsedecompositionanalysis_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
52265 {
52266     sparsedecompositionanalysis *dst = (sparsedecompositionanalysis*)_dst;
52267     sparsedecompositionanalysis *src = (sparsedecompositionanalysis*)_src;
52268     dst->n = src->n;
52269     dst->facttype = src->facttype;
52270     dst->permtype = src->permtype;
52271     _spcholanalysis_init_copy(&dst->analysis, &src->analysis, _state, make_automatic);
52272     _sparsematrix_init_copy(&dst->wrka, &src->wrka, _state, make_automatic);
52273     _sparsematrix_init_copy(&dst->wrkat, &src->wrkat, _state, make_automatic);
52274     _sparsematrix_init_copy(&dst->crsa, &src->crsa, _state, make_automatic);
52275     _sparsematrix_init_copy(&dst->crsat, &src->crsat, _state, make_automatic);
52276 }
52277 
52278 
_sparsedecompositionanalysis_clear(void * _p)52279 void _sparsedecompositionanalysis_clear(void* _p)
52280 {
52281     sparsedecompositionanalysis *p = (sparsedecompositionanalysis*)_p;
52282     ae_touch_ptr((void*)p);
52283     _spcholanalysis_clear(&p->analysis);
52284     _sparsematrix_clear(&p->wrka);
52285     _sparsematrix_clear(&p->wrkat);
52286     _sparsematrix_clear(&p->crsa);
52287     _sparsematrix_clear(&p->crsat);
52288 }
52289 
52290 
_sparsedecompositionanalysis_destroy(void * _p)52291 void _sparsedecompositionanalysis_destroy(void* _p)
52292 {
52293     sparsedecompositionanalysis *p = (sparsedecompositionanalysis*)_p;
52294     ae_touch_ptr((void*)p);
52295     _spcholanalysis_destroy(&p->analysis);
52296     _sparsematrix_destroy(&p->wrka);
52297     _sparsematrix_destroy(&p->wrkat);
52298     _sparsematrix_destroy(&p->crsa);
52299     _sparsematrix_destroy(&p->crsat);
52300 }
52301 
52302 
52303 #endif
52304 #if defined(AE_COMPILE_BDSVD) || !defined(AE_PARTIAL_BUILD)
52305 
52306 
52307 /*************************************************************************
52308 Singular value decomposition of a bidiagonal matrix (extended algorithm)
52309 
52310 COMMERCIAL EDITION OF ALGLIB:
52311 
52312   ! Commercial version of ALGLIB includes one  important  improvement   of
52313   ! this function, which can be used from C++ and C#:
52314   ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
52315   !
52316   ! Intel MKL gives approximately constant  (with  respect  to  number  of
52317   ! worker threads) acceleration factor which depends on CPU  being  used,
52318   ! problem  size  and  "baseline"  ALGLIB  edition  which  is  used   for
52319   ! comparison.
52320   !
52321   ! Generally, commercial ALGLIB is several times faster than  open-source
52322   ! generic C edition, and many times faster than open-source C# edition.
52323   !
52324   ! Multithreaded acceleration is NOT supported for this function.
52325   !
52326   ! We recommend you to read 'Working with commercial version' section  of
52327   ! ALGLIB Reference Manual in order to find out how to  use  performance-
52328   ! related features provided by commercial edition of ALGLIB.
52329 
52330 The algorithm performs the singular value decomposition  of  a  bidiagonal
52331 matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and  P -
52332 orthogonal matrices, S - diagonal matrix with non-negative elements on the
52333 main diagonal, in descending order.
52334 
52335 The  algorithm  finds  singular  values.  In  addition,  the algorithm can
52336 calculate  matrices  Q  and P (more precisely, not the matrices, but their
52337 product  with  given  matrices U and VT - U*Q and (P^T)*VT)).  Of  course,
52338 matrices U and VT can be of any type, including identity. Furthermore, the
52339 algorithm can calculate Q'*C (this product is calculated more  effectively
52340 than U*Q,  because  this calculation operates with rows instead  of matrix
52341 columns).
52342 
52343 The feature of the algorithm is its ability to find  all  singular  values
52344 including those which are arbitrarily close to 0  with  relative  accuracy
52345 close to  machine precision. If the parameter IsFractionalAccuracyRequired
52346 is set to True, all singular values will have high relative accuracy close
52347 to machine precision. If the parameter is set to False, only  the  biggest
52348 singular value will have relative accuracy  close  to  machine  precision.
52349 The absolute error of other singular values is equal to the absolute error
52350 of the biggest singular value.
52351 
52352 Input parameters:
52353     D       -   main diagonal of matrix B.
52354                 Array whose index ranges within [0..N-1].
52355     E       -   superdiagonal (or subdiagonal) of matrix B.
52356                 Array whose index ranges within [0..N-2].
52357     N       -   size of matrix B.
52358     IsUpper -   True, if the matrix is upper bidiagonal.
52359     IsFractionalAccuracyRequired -
52360                 THIS PARAMETER IS IGNORED SINCE ALGLIB 3.5.0
52361                 SINGULAR VALUES ARE ALWAYS SEARCHED WITH HIGH ACCURACY.
52362     U       -   matrix to be multiplied by Q.
52363                 Array whose indexes range within [0..NRU-1, 0..N-1].
52364                 The matrix can be bigger, in that case only the  submatrix
52365                 [0..NRU-1, 0..N-1] will be multiplied by Q.
52366     NRU     -   number of rows in matrix U.
52367     C       -   matrix to be multiplied by Q'.
52368                 Array whose indexes range within [0..N-1, 0..NCC-1].
52369                 The matrix can be bigger, in that case only the  submatrix
52370                 [0..N-1, 0..NCC-1] will be multiplied by Q'.
52371     NCC     -   number of columns in matrix C.
52372     VT      -   matrix to be multiplied by P^T.
52373                 Array whose indexes range within [0..N-1, 0..NCVT-1].
52374                 The matrix can be bigger, in that case only the  submatrix
52375                 [0..N-1, 0..NCVT-1] will be multiplied by P^T.
52376     NCVT    -   number of columns in matrix VT.
52377 
52378 Output parameters:
52379     D       -   singular values of matrix B in descending order.
52380     U       -   if NRU>0, contains matrix U*Q.
52381     VT      -   if NCVT>0, contains matrix (P^T)*VT.
52382     C       -   if NCC>0, contains matrix Q'*C.
52383 
52384 Result:
52385     True, if the algorithm has converged.
52386     False, if the algorithm hasn't converged (rare case).
52387 
52388 NOTE: multiplication U*Q is performed by means of transposition to internal
52389       buffer, multiplication and backward transposition. It helps to avoid
52390       costly columnwise operations and speed-up algorithm.
52391 
52392 Additional information:
52393     The type of convergence is controlled by the internal  parameter  TOL.
52394     If the parameter is greater than 0, the singular values will have
52395     relative accuracy TOL. If TOL<0, the singular values will have
52396     absolute accuracy ABS(TOL)*norm(B).
52397     By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon,
52398     where Epsilon is the machine precision. It is not  recommended  to  use
52399     TOL less than 10*Epsilon since this will  considerably  slow  down  the
52400     algorithm and may not lead to error decreasing.
52401 
52402 History:
52403     * 31 March, 2007.
52404         changed MAXITR from 6 to 12.
52405 
52406   -- LAPACK routine (version 3.0) --
52407      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
52408      Courant Institute, Argonne National Lab, and Rice University
52409      October 31, 1999.
52410 *************************************************************************/
rmatrixbdsvd(ae_vector * d,ae_vector * e,ae_int_t n,ae_bool isupper,ae_bool isfractionalaccuracyrequired,ae_matrix * u,ae_int_t nru,ae_matrix * c,ae_int_t ncc,ae_matrix * vt,ae_int_t ncvt,ae_state * _state)52411 ae_bool rmatrixbdsvd(/* Real    */ ae_vector* d,
52412      /* Real    */ ae_vector* e,
52413      ae_int_t n,
52414      ae_bool isupper,
52415      ae_bool isfractionalaccuracyrequired,
52416      /* Real    */ ae_matrix* u,
52417      ae_int_t nru,
52418      /* Real    */ ae_matrix* c,
52419      ae_int_t ncc,
52420      /* Real    */ ae_matrix* vt,
52421      ae_int_t ncvt,
52422      ae_state *_state)
52423 {
52424     ae_frame _frame_block;
52425     ae_vector _e;
52426     ae_int_t i;
52427     ae_vector en;
52428     ae_vector d1;
52429     ae_vector e1;
52430     ae_bool result;
52431 
52432     ae_frame_make(_state, &_frame_block);
52433     memset(&_e, 0, sizeof(_e));
52434     memset(&en, 0, sizeof(en));
52435     memset(&d1, 0, sizeof(d1));
52436     memset(&e1, 0, sizeof(e1));
52437     ae_vector_init_copy(&_e, e, _state, ae_true);
52438     e = &_e;
52439     ae_vector_init(&en, 0, DT_REAL, _state, ae_true);
52440     ae_vector_init(&d1, 0, DT_REAL, _state, ae_true);
52441     ae_vector_init(&e1, 0, DT_REAL, _state, ae_true);
52442 
52443     result = ae_false;
52444 
52445     /*
52446      * Try to use MKL
52447      */
52448     ae_vector_set_length(&en, n, _state);
52449     for(i=0; i<=n-2; i++)
52450     {
52451         en.ptr.p_double[i] = e->ptr.p_double[i];
52452     }
52453     en.ptr.p_double[n-1] = 0.0;
52454     if( rmatrixbdsvdmkl(d, &en, n, isupper, u, nru, c, ncc, vt, ncvt, &result, _state) )
52455     {
52456         ae_frame_leave(_state);
52457         return result;
52458     }
52459 
52460     /*
52461      * Use ALGLIB code
52462      */
52463     ae_vector_set_length(&d1, n+1, _state);
52464     ae_v_move(&d1.ptr.p_double[1], 1, &d->ptr.p_double[0], 1, ae_v_len(1,n));
52465     if( n>1 )
52466     {
52467         ae_vector_set_length(&e1, n-1+1, _state);
52468         ae_v_move(&e1.ptr.p_double[1], 1, &e->ptr.p_double[0], 1, ae_v_len(1,n-1));
52469     }
52470     result = bdsvd_bidiagonalsvddecompositioninternal(&d1, &e1, n, isupper, isfractionalaccuracyrequired, u, 0, nru, c, 0, ncc, vt, 0, ncvt, _state);
52471     ae_v_move(&d->ptr.p_double[0], 1, &d1.ptr.p_double[1], 1, ae_v_len(0,n-1));
52472     ae_frame_leave(_state);
52473     return result;
52474 }
52475 
52476 
bidiagonalsvddecomposition(ae_vector * d,ae_vector * e,ae_int_t n,ae_bool isupper,ae_bool isfractionalaccuracyrequired,ae_matrix * u,ae_int_t nru,ae_matrix * c,ae_int_t ncc,ae_matrix * vt,ae_int_t ncvt,ae_state * _state)52477 ae_bool bidiagonalsvddecomposition(/* Real    */ ae_vector* d,
52478      /* Real    */ ae_vector* e,
52479      ae_int_t n,
52480      ae_bool isupper,
52481      ae_bool isfractionalaccuracyrequired,
52482      /* Real    */ ae_matrix* u,
52483      ae_int_t nru,
52484      /* Real    */ ae_matrix* c,
52485      ae_int_t ncc,
52486      /* Real    */ ae_matrix* vt,
52487      ae_int_t ncvt,
52488      ae_state *_state)
52489 {
52490     ae_frame _frame_block;
52491     ae_vector _e;
52492     ae_bool result;
52493 
52494     ae_frame_make(_state, &_frame_block);
52495     memset(&_e, 0, sizeof(_e));
52496     ae_vector_init_copy(&_e, e, _state, ae_true);
52497     e = &_e;
52498 
52499     result = bdsvd_bidiagonalsvddecompositioninternal(d, e, n, isupper, isfractionalaccuracyrequired, u, 1, nru, c, 1, ncc, vt, 1, ncvt, _state);
52500     ae_frame_leave(_state);
52501     return result;
52502 }
52503 
52504 
52505 /*************************************************************************
52506 Internal working subroutine for bidiagonal decomposition
52507 *************************************************************************/
bdsvd_bidiagonalsvddecompositioninternal(ae_vector * d,ae_vector * e,ae_int_t n,ae_bool isupper,ae_bool isfractionalaccuracyrequired,ae_matrix * uu,ae_int_t ustart,ae_int_t nru,ae_matrix * c,ae_int_t cstart,ae_int_t ncc,ae_matrix * vt,ae_int_t vstart,ae_int_t ncvt,ae_state * _state)52508 static ae_bool bdsvd_bidiagonalsvddecompositioninternal(/* Real    */ ae_vector* d,
52509      /* Real    */ ae_vector* e,
52510      ae_int_t n,
52511      ae_bool isupper,
52512      ae_bool isfractionalaccuracyrequired,
52513      /* Real    */ ae_matrix* uu,
52514      ae_int_t ustart,
52515      ae_int_t nru,
52516      /* Real    */ ae_matrix* c,
52517      ae_int_t cstart,
52518      ae_int_t ncc,
52519      /* Real    */ ae_matrix* vt,
52520      ae_int_t vstart,
52521      ae_int_t ncvt,
52522      ae_state *_state)
52523 {
52524     ae_frame _frame_block;
52525     ae_vector _e;
52526     ae_int_t i;
52527     ae_int_t idir;
52528     ae_int_t isub;
52529     ae_int_t iter;
52530     ae_int_t j;
52531     ae_int_t ll;
52532     ae_int_t lll;
52533     ae_int_t m;
52534     ae_int_t maxit;
52535     ae_int_t oldll;
52536     ae_int_t oldm;
52537     double abse;
52538     double abss;
52539     double cosl;
52540     double cosr;
52541     double cs;
52542     double eps;
52543     double f;
52544     double g;
52545     double h;
52546     double mu;
52547     double oldcs;
52548     double oldsn;
52549     double r;
52550     double shift;
52551     double sigmn;
52552     double sigmx;
52553     double sinl;
52554     double sinr;
52555     double sll;
52556     double smax;
52557     double smin;
52558     double sminl;
52559     double sminoa;
52560     double sn;
52561     double thresh;
52562     double tol;
52563     double tolmul;
52564     double unfl;
52565     ae_vector work0;
52566     ae_vector work1;
52567     ae_vector work2;
52568     ae_vector work3;
52569     ae_int_t maxitr;
52570     ae_bool matrixsplitflag;
52571     ae_bool iterflag;
52572     ae_vector utemp;
52573     ae_vector vttemp;
52574     ae_vector ctemp;
52575     ae_vector etemp;
52576     ae_matrix ut;
52577     ae_bool fwddir;
52578     double tmp;
52579     ae_int_t mm1;
52580     ae_int_t mm0;
52581     ae_bool bchangedir;
52582     ae_int_t uend;
52583     ae_int_t cend;
52584     ae_int_t vend;
52585     ae_bool result;
52586 
52587     ae_frame_make(_state, &_frame_block);
52588     memset(&_e, 0, sizeof(_e));
52589     memset(&work0, 0, sizeof(work0));
52590     memset(&work1, 0, sizeof(work1));
52591     memset(&work2, 0, sizeof(work2));
52592     memset(&work3, 0, sizeof(work3));
52593     memset(&utemp, 0, sizeof(utemp));
52594     memset(&vttemp, 0, sizeof(vttemp));
52595     memset(&ctemp, 0, sizeof(ctemp));
52596     memset(&etemp, 0, sizeof(etemp));
52597     memset(&ut, 0, sizeof(ut));
52598     ae_vector_init_copy(&_e, e, _state, ae_true);
52599     e = &_e;
52600     ae_vector_init(&work0, 0, DT_REAL, _state, ae_true);
52601     ae_vector_init(&work1, 0, DT_REAL, _state, ae_true);
52602     ae_vector_init(&work2, 0, DT_REAL, _state, ae_true);
52603     ae_vector_init(&work3, 0, DT_REAL, _state, ae_true);
52604     ae_vector_init(&utemp, 0, DT_REAL, _state, ae_true);
52605     ae_vector_init(&vttemp, 0, DT_REAL, _state, ae_true);
52606     ae_vector_init(&ctemp, 0, DT_REAL, _state, ae_true);
52607     ae_vector_init(&etemp, 0, DT_REAL, _state, ae_true);
52608     ae_matrix_init(&ut, 0, 0, DT_REAL, _state, ae_true);
52609 
52610     result = ae_true;
52611     if( n==0 )
52612     {
52613         ae_frame_leave(_state);
52614         return result;
52615     }
52616     if( n==1 )
52617     {
52618         if( ae_fp_less(d->ptr.p_double[1],(double)(0)) )
52619         {
52620             d->ptr.p_double[1] = -d->ptr.p_double[1];
52621             if( ncvt>0 )
52622             {
52623                 ae_v_muld(&vt->ptr.pp_double[vstart][vstart], 1, ae_v_len(vstart,vstart+ncvt-1), -1);
52624             }
52625         }
52626         ae_frame_leave(_state);
52627         return result;
52628     }
52629 
52630     /*
52631      * these initializers are not really necessary,
52632      * but without them compiler complains about uninitialized locals
52633      */
52634     ll = 0;
52635     oldsn = (double)(0);
52636 
52637     /*
52638      * init
52639      */
52640     ae_vector_set_length(&work0, n-1+1, _state);
52641     ae_vector_set_length(&work1, n-1+1, _state);
52642     ae_vector_set_length(&work2, n-1+1, _state);
52643     ae_vector_set_length(&work3, n-1+1, _state);
52644     uend = ustart+ae_maxint(nru-1, 0, _state);
52645     vend = vstart+ae_maxint(ncvt-1, 0, _state);
52646     cend = cstart+ae_maxint(ncc-1, 0, _state);
52647     ae_vector_set_length(&utemp, uend+1, _state);
52648     ae_vector_set_length(&vttemp, vend+1, _state);
52649     ae_vector_set_length(&ctemp, cend+1, _state);
52650     maxitr = 12;
52651     fwddir = ae_true;
52652     if( nru>0 )
52653     {
52654         ae_matrix_set_length(&ut, ustart+n, ustart+nru, _state);
52655         rmatrixtranspose(nru, n, uu, ustart, ustart, &ut, ustart, ustart, _state);
52656     }
52657 
52658     /*
52659      * resize E from N-1 to N
52660      */
52661     ae_vector_set_length(&etemp, n+1, _state);
52662     for(i=1; i<=n-1; i++)
52663     {
52664         etemp.ptr.p_double[i] = e->ptr.p_double[i];
52665     }
52666     ae_vector_set_length(e, n+1, _state);
52667     for(i=1; i<=n-1; i++)
52668     {
52669         e->ptr.p_double[i] = etemp.ptr.p_double[i];
52670     }
52671     e->ptr.p_double[n] = (double)(0);
52672     idir = 0;
52673 
52674     /*
52675      * Get machine constants
52676      */
52677     eps = ae_machineepsilon;
52678     unfl = ae_minrealnumber;
52679 
52680     /*
52681      * If matrix lower bidiagonal, rotate to be upper bidiagonal
52682      * by applying Givens rotations on the left
52683      */
52684     if( !isupper )
52685     {
52686         for(i=1; i<=n-1; i++)
52687         {
52688             generaterotation(d->ptr.p_double[i], e->ptr.p_double[i], &cs, &sn, &r, _state);
52689             d->ptr.p_double[i] = r;
52690             e->ptr.p_double[i] = sn*d->ptr.p_double[i+1];
52691             d->ptr.p_double[i+1] = cs*d->ptr.p_double[i+1];
52692             work0.ptr.p_double[i] = cs;
52693             work1.ptr.p_double[i] = sn;
52694         }
52695 
52696         /*
52697          * Update singular vectors if desired
52698          */
52699         if( nru>0 )
52700         {
52701             applyrotationsfromtheleft(fwddir, 1+ustart-1, n+ustart-1, ustart, uend, &work0, &work1, &ut, &utemp, _state);
52702         }
52703         if( ncc>0 )
52704         {
52705             applyrotationsfromtheleft(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state);
52706         }
52707     }
52708 
52709     /*
52710      * Compute singular values to relative accuracy TOL
52711      * (By setting TOL to be negative, algorithm will compute
52712      * singular values to absolute accuracy ABS(TOL)*norm(input matrix))
52713      */
52714     tolmul = ae_maxreal((double)(10), ae_minreal((double)(100), ae_pow(eps, -0.125, _state), _state), _state);
52715     tol = tolmul*eps;
52716 
52717     /*
52718      * Compute approximate maximum, minimum singular values
52719      */
52720     smax = (double)(0);
52721     for(i=1; i<=n; i++)
52722     {
52723         smax = ae_maxreal(smax, ae_fabs(d->ptr.p_double[i], _state), _state);
52724     }
52725     for(i=1; i<=n-1; i++)
52726     {
52727         smax = ae_maxreal(smax, ae_fabs(e->ptr.p_double[i], _state), _state);
52728     }
52729     sminl = (double)(0);
52730     if( ae_fp_greater_eq(tol,(double)(0)) )
52731     {
52732 
52733         /*
52734          * Relative accuracy desired
52735          */
52736         sminoa = ae_fabs(d->ptr.p_double[1], _state);
52737         if( ae_fp_neq(sminoa,(double)(0)) )
52738         {
52739             mu = sminoa;
52740             for(i=2; i<=n; i++)
52741             {
52742                 mu = ae_fabs(d->ptr.p_double[i], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[i-1], _state)));
52743                 sminoa = ae_minreal(sminoa, mu, _state);
52744                 if( ae_fp_eq(sminoa,(double)(0)) )
52745                 {
52746                     break;
52747                 }
52748             }
52749         }
52750         sminoa = sminoa/ae_sqrt((double)(n), _state);
52751         thresh = ae_maxreal(tol*sminoa, maxitr*n*n*unfl, _state);
52752     }
52753     else
52754     {
52755 
52756         /*
52757          * Absolute accuracy desired
52758          */
52759         thresh = ae_maxreal(ae_fabs(tol, _state)*smax, maxitr*n*n*unfl, _state);
52760     }
52761 
52762     /*
52763      * Prepare for main iteration loop for the singular values
52764      * (MAXIT is the maximum number of passes through the inner
52765      * loop permitted before nonconvergence signalled.)
52766      */
52767     maxit = maxitr*n*n;
52768     iter = 0;
52769     oldll = -1;
52770     oldm = -1;
52771 
52772     /*
52773      * M points to last element of unconverged part of matrix
52774      */
52775     m = n;
52776 
52777     /*
52778      * Begin main iteration loop
52779      */
52780     for(;;)
52781     {
52782 
52783         /*
52784          * Check for convergence or exceeding iteration count
52785          */
52786         if( m<=1 )
52787         {
52788             break;
52789         }
52790         if( iter>maxit )
52791         {
52792             result = ae_false;
52793             ae_frame_leave(_state);
52794             return result;
52795         }
52796 
52797         /*
52798          * Find diagonal block of matrix to work on
52799          */
52800         if( ae_fp_less(tol,(double)(0))&&ae_fp_less_eq(ae_fabs(d->ptr.p_double[m], _state),thresh) )
52801         {
52802             d->ptr.p_double[m] = (double)(0);
52803         }
52804         smax = ae_fabs(d->ptr.p_double[m], _state);
52805         smin = smax;
52806         matrixsplitflag = ae_false;
52807         for(lll=1; lll<=m-1; lll++)
52808         {
52809             ll = m-lll;
52810             abss = ae_fabs(d->ptr.p_double[ll], _state);
52811             abse = ae_fabs(e->ptr.p_double[ll], _state);
52812             if( ae_fp_less(tol,(double)(0))&&ae_fp_less_eq(abss,thresh) )
52813             {
52814                 d->ptr.p_double[ll] = (double)(0);
52815             }
52816             if( ae_fp_less_eq(abse,thresh) )
52817             {
52818                 matrixsplitflag = ae_true;
52819                 break;
52820             }
52821             smin = ae_minreal(smin, abss, _state);
52822             smax = ae_maxreal(smax, ae_maxreal(abss, abse, _state), _state);
52823         }
52824         if( !matrixsplitflag )
52825         {
52826             ll = 0;
52827         }
52828         else
52829         {
52830 
52831             /*
52832              * Matrix splits since E(LL) = 0
52833              */
52834             e->ptr.p_double[ll] = (double)(0);
52835             if( ll==m-1 )
52836             {
52837 
52838                 /*
52839                  * Convergence of bottom singular value, return to top of loop
52840                  */
52841                 m = m-1;
52842                 continue;
52843             }
52844         }
52845         ll = ll+1;
52846 
52847         /*
52848          * E(LL) through E(M-1) are nonzero, E(LL-1) is zero
52849          */
52850         if( ll==m-1 )
52851         {
52852 
52853             /*
52854              * 2 by 2 block, handle separately
52855              */
52856             bdsvd_svdv2x2(d->ptr.p_double[m-1], e->ptr.p_double[m-1], d->ptr.p_double[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl, _state);
52857             d->ptr.p_double[m-1] = sigmx;
52858             e->ptr.p_double[m-1] = (double)(0);
52859             d->ptr.p_double[m] = sigmn;
52860 
52861             /*
52862              * Compute singular vectors, if desired
52863              */
52864             if( ncvt>0 )
52865             {
52866                 mm0 = m+(vstart-1);
52867                 mm1 = m-1+(vstart-1);
52868                 ae_v_moved(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[mm1][vstart], 1, ae_v_len(vstart,vend), cosr);
52869                 ae_v_addd(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[mm0][vstart], 1, ae_v_len(vstart,vend), sinr);
52870                 ae_v_muld(&vt->ptr.pp_double[mm0][vstart], 1, ae_v_len(vstart,vend), cosr);
52871                 ae_v_subd(&vt->ptr.pp_double[mm0][vstart], 1, &vt->ptr.pp_double[mm1][vstart], 1, ae_v_len(vstart,vend), sinr);
52872                 ae_v_move(&vt->ptr.pp_double[mm1][vstart], 1, &vttemp.ptr.p_double[vstart], 1, ae_v_len(vstart,vend));
52873             }
52874             if( nru>0 )
52875             {
52876                 mm0 = m+ustart-1;
52877                 mm1 = m-1+ustart-1;
52878                 ae_v_moved(&utemp.ptr.p_double[ustart], 1, &ut.ptr.pp_double[mm1][ustart], 1, ae_v_len(ustart,uend), cosl);
52879                 ae_v_addd(&utemp.ptr.p_double[ustart], 1, &ut.ptr.pp_double[mm0][ustart], 1, ae_v_len(ustart,uend), sinl);
52880                 ae_v_muld(&ut.ptr.pp_double[mm0][ustart], 1, ae_v_len(ustart,uend), cosl);
52881                 ae_v_subd(&ut.ptr.pp_double[mm0][ustart], 1, &ut.ptr.pp_double[mm1][ustart], 1, ae_v_len(ustart,uend), sinl);
52882                 ae_v_move(&ut.ptr.pp_double[mm1][ustart], 1, &utemp.ptr.p_double[ustart], 1, ae_v_len(ustart,uend));
52883             }
52884             if( ncc>0 )
52885             {
52886                 mm0 = m+cstart-1;
52887                 mm1 = m-1+cstart-1;
52888                 ae_v_moved(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[mm1][cstart], 1, ae_v_len(cstart,cend), cosl);
52889                 ae_v_addd(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[mm0][cstart], 1, ae_v_len(cstart,cend), sinl);
52890                 ae_v_muld(&c->ptr.pp_double[mm0][cstart], 1, ae_v_len(cstart,cend), cosl);
52891                 ae_v_subd(&c->ptr.pp_double[mm0][cstart], 1, &c->ptr.pp_double[mm1][cstart], 1, ae_v_len(cstart,cend), sinl);
52892                 ae_v_move(&c->ptr.pp_double[mm1][cstart], 1, &ctemp.ptr.p_double[cstart], 1, ae_v_len(cstart,cend));
52893             }
52894             m = m-2;
52895             continue;
52896         }
52897 
52898         /*
52899          * If working on new submatrix, choose shift direction
52900          * (from larger end diagonal element towards smaller)
52901          *
52902          * Previously was
52903          *     "if (LL>OLDM) or (M<OLDLL) then"
52904          * fixed thanks to Michael Rolle < m@rolle.name >
52905          * Very strange that LAPACK still contains it.
52906          */
52907         bchangedir = ae_false;
52908         if( idir==1&&ae_fp_less(ae_fabs(d->ptr.p_double[ll], _state),1.0E-3*ae_fabs(d->ptr.p_double[m], _state)) )
52909         {
52910             bchangedir = ae_true;
52911         }
52912         if( idir==2&&ae_fp_less(ae_fabs(d->ptr.p_double[m], _state),1.0E-3*ae_fabs(d->ptr.p_double[ll], _state)) )
52913         {
52914             bchangedir = ae_true;
52915         }
52916         if( (ll!=oldll||m!=oldm)||bchangedir )
52917         {
52918             if( ae_fp_greater_eq(ae_fabs(d->ptr.p_double[ll], _state),ae_fabs(d->ptr.p_double[m], _state)) )
52919             {
52920 
52921                 /*
52922                  * Chase bulge from top (big end) to bottom (small end)
52923                  */
52924                 idir = 1;
52925             }
52926             else
52927             {
52928 
52929                 /*
52930                  * Chase bulge from bottom (big end) to top (small end)
52931                  */
52932                 idir = 2;
52933             }
52934         }
52935 
52936         /*
52937          * Apply convergence tests
52938          */
52939         if( idir==1 )
52940         {
52941 
52942             /*
52943              * Run convergence test in forward direction
52944              * First apply standard test to bottom of matrix
52945              */
52946             if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),ae_fabs(tol, _state)*ae_fabs(d->ptr.p_double[m], _state))||(ae_fp_less(tol,(double)(0))&&ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh)) )
52947             {
52948                 e->ptr.p_double[m-1] = (double)(0);
52949                 continue;
52950             }
52951             if( ae_fp_greater_eq(tol,(double)(0)) )
52952             {
52953 
52954                 /*
52955                  * If relative accuracy desired,
52956                  * apply convergence criterion forward
52957                  */
52958                 mu = ae_fabs(d->ptr.p_double[ll], _state);
52959                 sminl = mu;
52960                 iterflag = ae_false;
52961                 for(lll=ll; lll<=m-1; lll++)
52962                 {
52963                     if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[lll], _state),tol*mu) )
52964                     {
52965                         e->ptr.p_double[lll] = (double)(0);
52966                         iterflag = ae_true;
52967                         break;
52968                     }
52969                     mu = ae_fabs(d->ptr.p_double[lll+1], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[lll], _state)));
52970                     sminl = ae_minreal(sminl, mu, _state);
52971                 }
52972                 if( iterflag )
52973                 {
52974                     continue;
52975                 }
52976             }
52977         }
52978         else
52979         {
52980 
52981             /*
52982              * Run convergence test in backward direction
52983              * First apply standard test to top of matrix
52984              */
52985             if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),ae_fabs(tol, _state)*ae_fabs(d->ptr.p_double[ll], _state))||(ae_fp_less(tol,(double)(0))&&ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh)) )
52986             {
52987                 e->ptr.p_double[ll] = (double)(0);
52988                 continue;
52989             }
52990             if( ae_fp_greater_eq(tol,(double)(0)) )
52991             {
52992 
52993                 /*
52994                  * If relative accuracy desired,
52995                  * apply convergence criterion backward
52996                  */
52997                 mu = ae_fabs(d->ptr.p_double[m], _state);
52998                 sminl = mu;
52999                 iterflag = ae_false;
53000                 for(lll=m-1; lll>=ll; lll--)
53001                 {
53002                     if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[lll], _state),tol*mu) )
53003                     {
53004                         e->ptr.p_double[lll] = (double)(0);
53005                         iterflag = ae_true;
53006                         break;
53007                     }
53008                     mu = ae_fabs(d->ptr.p_double[lll], _state)*(mu/(mu+ae_fabs(e->ptr.p_double[lll], _state)));
53009                     sminl = ae_minreal(sminl, mu, _state);
53010                 }
53011                 if( iterflag )
53012                 {
53013                     continue;
53014                 }
53015             }
53016         }
53017         oldll = ll;
53018         oldm = m;
53019 
53020         /*
53021          * Compute shift.  First, test if shifting would ruin relative
53022          * accuracy, and if so set the shift to zero.
53023          */
53024         if( ae_fp_greater_eq(tol,(double)(0))&&ae_fp_less_eq(n*tol*(sminl/smax),ae_maxreal(eps, 0.01*tol, _state)) )
53025         {
53026 
53027             /*
53028              * Use a zero shift to avoid loss of relative accuracy
53029              */
53030             shift = (double)(0);
53031         }
53032         else
53033         {
53034 
53035             /*
53036              * Compute the shift from 2-by-2 block at end of matrix
53037              */
53038             if( idir==1 )
53039             {
53040                 sll = ae_fabs(d->ptr.p_double[ll], _state);
53041                 bdsvd_svd2x2(d->ptr.p_double[m-1], e->ptr.p_double[m-1], d->ptr.p_double[m], &shift, &r, _state);
53042             }
53043             else
53044             {
53045                 sll = ae_fabs(d->ptr.p_double[m], _state);
53046                 bdsvd_svd2x2(d->ptr.p_double[ll], e->ptr.p_double[ll], d->ptr.p_double[ll+1], &shift, &r, _state);
53047             }
53048 
53049             /*
53050              * Test if shift negligible, and if so set to zero
53051              */
53052             if( ae_fp_greater(sll,(double)(0)) )
53053             {
53054                 if( ae_fp_less(ae_sqr(shift/sll, _state),eps) )
53055                 {
53056                     shift = (double)(0);
53057                 }
53058             }
53059         }
53060 
53061         /*
53062          * Increment iteration count
53063          */
53064         iter = iter+m-ll;
53065 
53066         /*
53067          * If SHIFT = 0, do simplified QR iteration
53068          */
53069         if( ae_fp_eq(shift,(double)(0)) )
53070         {
53071             if( idir==1 )
53072             {
53073 
53074                 /*
53075                  * Chase bulge from top to bottom
53076                  * Save cosines and sines for later singular vector updates
53077                  */
53078                 cs = (double)(1);
53079                 oldcs = (double)(1);
53080                 for(i=ll; i<=m-1; i++)
53081                 {
53082                     generaterotation(d->ptr.p_double[i]*cs, e->ptr.p_double[i], &cs, &sn, &r, _state);
53083                     if( i>ll )
53084                     {
53085                         e->ptr.p_double[i-1] = oldsn*r;
53086                     }
53087                     generaterotation(oldcs*r, d->ptr.p_double[i+1]*sn, &oldcs, &oldsn, &tmp, _state);
53088                     d->ptr.p_double[i] = tmp;
53089                     work0.ptr.p_double[i-ll+1] = cs;
53090                     work1.ptr.p_double[i-ll+1] = sn;
53091                     work2.ptr.p_double[i-ll+1] = oldcs;
53092                     work3.ptr.p_double[i-ll+1] = oldsn;
53093                 }
53094                 h = d->ptr.p_double[m]*cs;
53095                 d->ptr.p_double[m] = h*oldcs;
53096                 e->ptr.p_double[m-1] = h*oldsn;
53097 
53098                 /*
53099                  * Update singular vectors
53100                  */
53101                 if( ncvt>0 )
53102                 {
53103                     applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work0, &work1, vt, &vttemp, _state);
53104                 }
53105                 if( nru>0 )
53106                 {
53107                     applyrotationsfromtheleft(fwddir, ll+ustart-1, m+ustart-1, ustart, uend, &work2, &work3, &ut, &utemp, _state);
53108                 }
53109                 if( ncc>0 )
53110                 {
53111                     applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work2, &work3, c, &ctemp, _state);
53112                 }
53113 
53114                 /*
53115                  * Test convergence
53116                  */
53117                 if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh) )
53118                 {
53119                     e->ptr.p_double[m-1] = (double)(0);
53120                 }
53121             }
53122             else
53123             {
53124 
53125                 /*
53126                  * Chase bulge from bottom to top
53127                  * Save cosines and sines for later singular vector updates
53128                  */
53129                 cs = (double)(1);
53130                 oldcs = (double)(1);
53131                 for(i=m; i>=ll+1; i--)
53132                 {
53133                     generaterotation(d->ptr.p_double[i]*cs, e->ptr.p_double[i-1], &cs, &sn, &r, _state);
53134                     if( i<m )
53135                     {
53136                         e->ptr.p_double[i] = oldsn*r;
53137                     }
53138                     generaterotation(oldcs*r, d->ptr.p_double[i-1]*sn, &oldcs, &oldsn, &tmp, _state);
53139                     d->ptr.p_double[i] = tmp;
53140                     work0.ptr.p_double[i-ll] = cs;
53141                     work1.ptr.p_double[i-ll] = -sn;
53142                     work2.ptr.p_double[i-ll] = oldcs;
53143                     work3.ptr.p_double[i-ll] = -oldsn;
53144                 }
53145                 h = d->ptr.p_double[ll]*cs;
53146                 d->ptr.p_double[ll] = h*oldcs;
53147                 e->ptr.p_double[ll] = h*oldsn;
53148 
53149                 /*
53150                  * Update singular vectors
53151                  */
53152                 if( ncvt>0 )
53153                 {
53154                     applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work2, &work3, vt, &vttemp, _state);
53155                 }
53156                 if( nru>0 )
53157                 {
53158                     applyrotationsfromtheleft(!fwddir, ll+ustart-1, m+ustart-1, ustart, uend, &work0, &work1, &ut, &utemp, _state);
53159                 }
53160                 if( ncc>0 )
53161                 {
53162                     applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state);
53163                 }
53164 
53165                 /*
53166                  * Test convergence
53167                  */
53168                 if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh) )
53169                 {
53170                     e->ptr.p_double[ll] = (double)(0);
53171                 }
53172             }
53173         }
53174         else
53175         {
53176 
53177             /*
53178              * Use nonzero shift
53179              */
53180             if( idir==1 )
53181             {
53182 
53183                 /*
53184                  * Chase bulge from top to bottom
53185                  * Save cosines and sines for later singular vector updates
53186                  */
53187                 f = (ae_fabs(d->ptr.p_double[ll], _state)-shift)*(bdsvd_extsignbdsqr((double)(1), d->ptr.p_double[ll], _state)+shift/d->ptr.p_double[ll]);
53188                 g = e->ptr.p_double[ll];
53189                 for(i=ll; i<=m-1; i++)
53190                 {
53191                     generaterotation(f, g, &cosr, &sinr, &r, _state);
53192                     if( i>ll )
53193                     {
53194                         e->ptr.p_double[i-1] = r;
53195                     }
53196                     f = cosr*d->ptr.p_double[i]+sinr*e->ptr.p_double[i];
53197                     e->ptr.p_double[i] = cosr*e->ptr.p_double[i]-sinr*d->ptr.p_double[i];
53198                     g = sinr*d->ptr.p_double[i+1];
53199                     d->ptr.p_double[i+1] = cosr*d->ptr.p_double[i+1];
53200                     generaterotation(f, g, &cosl, &sinl, &r, _state);
53201                     d->ptr.p_double[i] = r;
53202                     f = cosl*e->ptr.p_double[i]+sinl*d->ptr.p_double[i+1];
53203                     d->ptr.p_double[i+1] = cosl*d->ptr.p_double[i+1]-sinl*e->ptr.p_double[i];
53204                     if( i<m-1 )
53205                     {
53206                         g = sinl*e->ptr.p_double[i+1];
53207                         e->ptr.p_double[i+1] = cosl*e->ptr.p_double[i+1];
53208                     }
53209                     work0.ptr.p_double[i-ll+1] = cosr;
53210                     work1.ptr.p_double[i-ll+1] = sinr;
53211                     work2.ptr.p_double[i-ll+1] = cosl;
53212                     work3.ptr.p_double[i-ll+1] = sinl;
53213                 }
53214                 e->ptr.p_double[m-1] = f;
53215 
53216                 /*
53217                  * Update singular vectors
53218                  */
53219                 if( ncvt>0 )
53220                 {
53221                     applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work0, &work1, vt, &vttemp, _state);
53222                 }
53223                 if( nru>0 )
53224                 {
53225                     applyrotationsfromtheleft(fwddir, ll+ustart-1, m+ustart-1, ustart, uend, &work2, &work3, &ut, &utemp, _state);
53226                 }
53227                 if( ncc>0 )
53228                 {
53229                     applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work2, &work3, c, &ctemp, _state);
53230                 }
53231 
53232                 /*
53233                  * Test convergence
53234                  */
53235                 if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[m-1], _state),thresh) )
53236                 {
53237                     e->ptr.p_double[m-1] = (double)(0);
53238                 }
53239             }
53240             else
53241             {
53242 
53243                 /*
53244                  * Chase bulge from bottom to top
53245                  * Save cosines and sines for later singular vector updates
53246                  */
53247                 f = (ae_fabs(d->ptr.p_double[m], _state)-shift)*(bdsvd_extsignbdsqr((double)(1), d->ptr.p_double[m], _state)+shift/d->ptr.p_double[m]);
53248                 g = e->ptr.p_double[m-1];
53249                 for(i=m; i>=ll+1; i--)
53250                 {
53251                     generaterotation(f, g, &cosr, &sinr, &r, _state);
53252                     if( i<m )
53253                     {
53254                         e->ptr.p_double[i] = r;
53255                     }
53256                     f = cosr*d->ptr.p_double[i]+sinr*e->ptr.p_double[i-1];
53257                     e->ptr.p_double[i-1] = cosr*e->ptr.p_double[i-1]-sinr*d->ptr.p_double[i];
53258                     g = sinr*d->ptr.p_double[i-1];
53259                     d->ptr.p_double[i-1] = cosr*d->ptr.p_double[i-1];
53260                     generaterotation(f, g, &cosl, &sinl, &r, _state);
53261                     d->ptr.p_double[i] = r;
53262                     f = cosl*e->ptr.p_double[i-1]+sinl*d->ptr.p_double[i-1];
53263                     d->ptr.p_double[i-1] = cosl*d->ptr.p_double[i-1]-sinl*e->ptr.p_double[i-1];
53264                     if( i>ll+1 )
53265                     {
53266                         g = sinl*e->ptr.p_double[i-2];
53267                         e->ptr.p_double[i-2] = cosl*e->ptr.p_double[i-2];
53268                     }
53269                     work0.ptr.p_double[i-ll] = cosr;
53270                     work1.ptr.p_double[i-ll] = -sinr;
53271                     work2.ptr.p_double[i-ll] = cosl;
53272                     work3.ptr.p_double[i-ll] = -sinl;
53273                 }
53274                 e->ptr.p_double[ll] = f;
53275 
53276                 /*
53277                  * Test convergence
53278                  */
53279                 if( ae_fp_less_eq(ae_fabs(e->ptr.p_double[ll], _state),thresh) )
53280                 {
53281                     e->ptr.p_double[ll] = (double)(0);
53282                 }
53283 
53284                 /*
53285                  * Update singular vectors if desired
53286                  */
53287                 if( ncvt>0 )
53288                 {
53289                     applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, &work2, &work3, vt, &vttemp, _state);
53290                 }
53291                 if( nru>0 )
53292                 {
53293                     applyrotationsfromtheleft(!fwddir, ll+ustart-1, m+ustart-1, ustart, uend, &work0, &work1, &ut, &utemp, _state);
53294                 }
53295                 if( ncc>0 )
53296                 {
53297                     applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, &work0, &work1, c, &ctemp, _state);
53298                 }
53299             }
53300         }
53301 
53302         /*
53303          * QR iteration finished, go back and check convergence
53304          */
53305         continue;
53306     }
53307 
53308     /*
53309      * All singular values converged, so make them positive
53310      */
53311     for(i=1; i<=n; i++)
53312     {
53313         if( ae_fp_less(d->ptr.p_double[i],(double)(0)) )
53314         {
53315             d->ptr.p_double[i] = -d->ptr.p_double[i];
53316 
53317             /*
53318              * Change sign of singular vectors, if desired
53319              */
53320             if( ncvt>0 )
53321             {
53322                 ae_v_muld(&vt->ptr.pp_double[i+vstart-1][vstart], 1, ae_v_len(vstart,vend), -1);
53323             }
53324         }
53325     }
53326 
53327     /*
53328      * Sort the singular values into decreasing order (insertion sort on
53329      * singular values, but only one transposition per singular vector)
53330      */
53331     for(i=1; i<=n-1; i++)
53332     {
53333 
53334         /*
53335          * Scan for smallest D(I)
53336          */
53337         isub = 1;
53338         smin = d->ptr.p_double[1];
53339         for(j=2; j<=n+1-i; j++)
53340         {
53341             if( ae_fp_less_eq(d->ptr.p_double[j],smin) )
53342             {
53343                 isub = j;
53344                 smin = d->ptr.p_double[j];
53345             }
53346         }
53347         if( isub!=n+1-i )
53348         {
53349 
53350             /*
53351              * Swap singular values and vectors
53352              */
53353             d->ptr.p_double[isub] = d->ptr.p_double[n+1-i];
53354             d->ptr.p_double[n+1-i] = smin;
53355             if( ncvt>0 )
53356             {
53357                 j = n+1-i;
53358                 ae_v_move(&vttemp.ptr.p_double[vstart], 1, &vt->ptr.pp_double[isub+vstart-1][vstart], 1, ae_v_len(vstart,vend));
53359                 ae_v_move(&vt->ptr.pp_double[isub+vstart-1][vstart], 1, &vt->ptr.pp_double[j+vstart-1][vstart], 1, ae_v_len(vstart,vend));
53360                 ae_v_move(&vt->ptr.pp_double[j+vstart-1][vstart], 1, &vttemp.ptr.p_double[vstart], 1, ae_v_len(vstart,vend));
53361             }
53362             if( nru>0 )
53363             {
53364                 j = n+1-i;
53365                 ae_v_move(&utemp.ptr.p_double[ustart], 1, &ut.ptr.pp_double[isub+ustart-1][ustart], 1, ae_v_len(ustart,uend));
53366                 ae_v_move(&ut.ptr.pp_double[isub+ustart-1][ustart], 1, &ut.ptr.pp_double[j+ustart-1][ustart], 1, ae_v_len(ustart,uend));
53367                 ae_v_move(&ut.ptr.pp_double[j+ustart-1][ustart], 1, &utemp.ptr.p_double[ustart], 1, ae_v_len(ustart,uend));
53368             }
53369             if( ncc>0 )
53370             {
53371                 j = n+1-i;
53372                 ae_v_move(&ctemp.ptr.p_double[cstart], 1, &c->ptr.pp_double[isub+cstart-1][cstart], 1, ae_v_len(cstart,cend));
53373                 ae_v_move(&c->ptr.pp_double[isub+cstart-1][cstart], 1, &c->ptr.pp_double[j+cstart-1][cstart], 1, ae_v_len(cstart,cend));
53374                 ae_v_move(&c->ptr.pp_double[j+cstart-1][cstart], 1, &ctemp.ptr.p_double[cstart], 1, ae_v_len(cstart,cend));
53375             }
53376         }
53377     }
53378 
53379     /*
53380      * Copy U back from temporary storage
53381      */
53382     if( nru>0 )
53383     {
53384         rmatrixtranspose(n, nru, &ut, ustart, ustart, uu, ustart, ustart, _state);
53385     }
53386     ae_frame_leave(_state);
53387     return result;
53388 }
53389 
53390 
bdsvd_extsignbdsqr(double a,double b,ae_state * _state)53391 static double bdsvd_extsignbdsqr(double a, double b, ae_state *_state)
53392 {
53393     double result;
53394 
53395 
53396     if( ae_fp_greater_eq(b,(double)(0)) )
53397     {
53398         result = ae_fabs(a, _state);
53399     }
53400     else
53401     {
53402         result = -ae_fabs(a, _state);
53403     }
53404     return result;
53405 }
53406 
53407 
bdsvd_svd2x2(double f,double g,double h,double * ssmin,double * ssmax,ae_state * _state)53408 static void bdsvd_svd2x2(double f,
53409      double g,
53410      double h,
53411      double* ssmin,
53412      double* ssmax,
53413      ae_state *_state)
53414 {
53415     double aas;
53416     double at;
53417     double au;
53418     double c;
53419     double fa;
53420     double fhmn;
53421     double fhmx;
53422     double ga;
53423     double ha;
53424 
53425     *ssmin = 0;
53426     *ssmax = 0;
53427 
53428     fa = ae_fabs(f, _state);
53429     ga = ae_fabs(g, _state);
53430     ha = ae_fabs(h, _state);
53431     fhmn = ae_minreal(fa, ha, _state);
53432     fhmx = ae_maxreal(fa, ha, _state);
53433     if( ae_fp_eq(fhmn,(double)(0)) )
53434     {
53435         *ssmin = (double)(0);
53436         if( ae_fp_eq(fhmx,(double)(0)) )
53437         {
53438             *ssmax = ga;
53439         }
53440         else
53441         {
53442             *ssmax = ae_maxreal(fhmx, ga, _state)*ae_sqrt(1+ae_sqr(ae_minreal(fhmx, ga, _state)/ae_maxreal(fhmx, ga, _state), _state), _state);
53443         }
53444     }
53445     else
53446     {
53447         if( ae_fp_less(ga,fhmx) )
53448         {
53449             aas = 1+fhmn/fhmx;
53450             at = (fhmx-fhmn)/fhmx;
53451             au = ae_sqr(ga/fhmx, _state);
53452             c = 2/(ae_sqrt(aas*aas+au, _state)+ae_sqrt(at*at+au, _state));
53453             *ssmin = fhmn*c;
53454             *ssmax = fhmx/c;
53455         }
53456         else
53457         {
53458             au = fhmx/ga;
53459             if( ae_fp_eq(au,(double)(0)) )
53460             {
53461 
53462                 /*
53463                  * Avoid possible harmful underflow if exponent range
53464                  * asymmetric (true SSMIN may not underflow even if
53465                  * AU underflows)
53466                  */
53467                 *ssmin = fhmn*fhmx/ga;
53468                 *ssmax = ga;
53469             }
53470             else
53471             {
53472                 aas = 1+fhmn/fhmx;
53473                 at = (fhmx-fhmn)/fhmx;
53474                 c = 1/(ae_sqrt(1+ae_sqr(aas*au, _state), _state)+ae_sqrt(1+ae_sqr(at*au, _state), _state));
53475                 *ssmin = fhmn*c*au;
53476                 *ssmin = *ssmin+(*ssmin);
53477                 *ssmax = ga/(c+c);
53478             }
53479         }
53480     }
53481 }
53482 
53483 
bdsvd_svdv2x2(double f,double g,double h,double * ssmin,double * ssmax,double * snr,double * csr,double * snl,double * csl,ae_state * _state)53484 static void bdsvd_svdv2x2(double f,
53485      double g,
53486      double h,
53487      double* ssmin,
53488      double* ssmax,
53489      double* snr,
53490      double* csr,
53491      double* snl,
53492      double* csl,
53493      ae_state *_state)
53494 {
53495     ae_bool gasmal;
53496     ae_bool swp;
53497     ae_int_t pmax;
53498     double a;
53499     double clt;
53500     double crt;
53501     double d;
53502     double fa;
53503     double ft;
53504     double ga;
53505     double gt;
53506     double ha;
53507     double ht;
53508     double l;
53509     double m;
53510     double mm;
53511     double r;
53512     double s;
53513     double slt;
53514     double srt;
53515     double t;
53516     double temp;
53517     double tsign;
53518     double tt;
53519     double v;
53520 
53521     *ssmin = 0;
53522     *ssmax = 0;
53523     *snr = 0;
53524     *csr = 0;
53525     *snl = 0;
53526     *csl = 0;
53527 
53528     ft = f;
53529     fa = ae_fabs(ft, _state);
53530     ht = h;
53531     ha = ae_fabs(h, _state);
53532 
53533     /*
53534      * these initializers are not really necessary,
53535      * but without them compiler complains about uninitialized locals
53536      */
53537     clt = (double)(0);
53538     crt = (double)(0);
53539     slt = (double)(0);
53540     srt = (double)(0);
53541     tsign = (double)(0);
53542 
53543     /*
53544      * PMAX points to the maximum absolute element of matrix
53545      *  PMAX = 1 if F largest in absolute values
53546      *  PMAX = 2 if G largest in absolute values
53547      *  PMAX = 3 if H largest in absolute values
53548      */
53549     pmax = 1;
53550     swp = ae_fp_greater(ha,fa);
53551     if( swp )
53552     {
53553 
53554         /*
53555          * Now FA .ge. HA
53556          */
53557         pmax = 3;
53558         temp = ft;
53559         ft = ht;
53560         ht = temp;
53561         temp = fa;
53562         fa = ha;
53563         ha = temp;
53564     }
53565     gt = g;
53566     ga = ae_fabs(gt, _state);
53567     if( ae_fp_eq(ga,(double)(0)) )
53568     {
53569 
53570         /*
53571          * Diagonal matrix
53572          */
53573         *ssmin = ha;
53574         *ssmax = fa;
53575         clt = (double)(1);
53576         crt = (double)(1);
53577         slt = (double)(0);
53578         srt = (double)(0);
53579     }
53580     else
53581     {
53582         gasmal = ae_true;
53583         if( ae_fp_greater(ga,fa) )
53584         {
53585             pmax = 2;
53586             if( ae_fp_less(fa/ga,ae_machineepsilon) )
53587             {
53588 
53589                 /*
53590                  * Case of very large GA
53591                  */
53592                 gasmal = ae_false;
53593                 *ssmax = ga;
53594                 if( ae_fp_greater(ha,(double)(1)) )
53595                 {
53596                     v = ga/ha;
53597                     *ssmin = fa/v;
53598                 }
53599                 else
53600                 {
53601                     v = fa/ga;
53602                     *ssmin = v*ha;
53603                 }
53604                 clt = (double)(1);
53605                 slt = ht/gt;
53606                 srt = (double)(1);
53607                 crt = ft/gt;
53608             }
53609         }
53610         if( gasmal )
53611         {
53612 
53613             /*
53614              * Normal case
53615              */
53616             d = fa-ha;
53617             if( ae_fp_eq(d,fa) )
53618             {
53619                 l = (double)(1);
53620             }
53621             else
53622             {
53623                 l = d/fa;
53624             }
53625             m = gt/ft;
53626             t = 2-l;
53627             mm = m*m;
53628             tt = t*t;
53629             s = ae_sqrt(tt+mm, _state);
53630             if( ae_fp_eq(l,(double)(0)) )
53631             {
53632                 r = ae_fabs(m, _state);
53633             }
53634             else
53635             {
53636                 r = ae_sqrt(l*l+mm, _state);
53637             }
53638             a = 0.5*(s+r);
53639             *ssmin = ha/a;
53640             *ssmax = fa*a;
53641             if( ae_fp_eq(mm,(double)(0)) )
53642             {
53643 
53644                 /*
53645                  * Note that M is very tiny
53646                  */
53647                 if( ae_fp_eq(l,(double)(0)) )
53648                 {
53649                     t = bdsvd_extsignbdsqr((double)(2), ft, _state)*bdsvd_extsignbdsqr((double)(1), gt, _state);
53650                 }
53651                 else
53652                 {
53653                     t = gt/bdsvd_extsignbdsqr(d, ft, _state)+m/t;
53654                 }
53655             }
53656             else
53657             {
53658                 t = (m/(s+t)+m/(r+l))*(1+a);
53659             }
53660             l = ae_sqrt(t*t+4, _state);
53661             crt = 2/l;
53662             srt = t/l;
53663             clt = (crt+srt*m)/a;
53664             v = ht/ft;
53665             slt = v*srt/a;
53666         }
53667     }
53668     if( swp )
53669     {
53670         *csl = srt;
53671         *snl = crt;
53672         *csr = slt;
53673         *snr = clt;
53674     }
53675     else
53676     {
53677         *csl = clt;
53678         *snl = slt;
53679         *csr = crt;
53680         *snr = srt;
53681     }
53682 
53683     /*
53684      * Correct signs of SSMAX and SSMIN
53685      */
53686     if( pmax==1 )
53687     {
53688         tsign = bdsvd_extsignbdsqr((double)(1), *csr, _state)*bdsvd_extsignbdsqr((double)(1), *csl, _state)*bdsvd_extsignbdsqr((double)(1), f, _state);
53689     }
53690     if( pmax==2 )
53691     {
53692         tsign = bdsvd_extsignbdsqr((double)(1), *snr, _state)*bdsvd_extsignbdsqr((double)(1), *csl, _state)*bdsvd_extsignbdsqr((double)(1), g, _state);
53693     }
53694     if( pmax==3 )
53695     {
53696         tsign = bdsvd_extsignbdsqr((double)(1), *snr, _state)*bdsvd_extsignbdsqr((double)(1), *snl, _state)*bdsvd_extsignbdsqr((double)(1), h, _state);
53697     }
53698     *ssmax = bdsvd_extsignbdsqr(*ssmax, tsign, _state);
53699     *ssmin = bdsvd_extsignbdsqr(*ssmin, tsign*bdsvd_extsignbdsqr((double)(1), f, _state)*bdsvd_extsignbdsqr((double)(1), h, _state), _state);
53700 }
53701 
53702 
53703 #endif
53704 #if defined(AE_COMPILE_SVD) || !defined(AE_PARTIAL_BUILD)
53705 
53706 
53707 /*************************************************************************
53708 Singular value decomposition of a rectangular matrix.
53709 
53710   ! COMMERCIAL EDITION OF ALGLIB:
53711   !
53712   ! Commercial Edition of ALGLIB includes following important improvements
53713   ! of this function:
53714   ! * high-performance native backend with same C# interface (C# version)
53715   ! * hardware vendor (Intel) implementations of linear algebra primitives
53716   !   (C++ and C# versions, x86/x64 platform)
53717   !
53718   ! We recommend you to read 'Working with commercial version' section  of
53719   ! ALGLIB Reference Manual in order to find out how to  use  performance-
53720   ! related features provided by commercial edition of ALGLIB.
53721 
53722 The algorithm calculates the singular value decomposition of a matrix of
53723 size MxN: A = U * S * V^T
53724 
53725 The algorithm finds the singular values and, optionally, matrices U and V^T.
53726 The algorithm can find both first min(M,N) columns of matrix U and rows of
53727 matrix V^T (singular vectors), and matrices U and V^T wholly (of sizes MxM
53728 and NxN respectively).
53729 
53730 Take into account that the subroutine does not return matrix V but V^T.
53731 
53732 Input parameters:
53733     A           -   matrix to be decomposed.
53734                     Array whose indexes range within [0..M-1, 0..N-1].
53735     M           -   number of rows in matrix A.
53736     N           -   number of columns in matrix A.
53737     UNeeded     -   0, 1 or 2. See the description of the parameter U.
53738     VTNeeded    -   0, 1 or 2. See the description of the parameter VT.
53739     AdditionalMemory -
53740                     If the parameter:
53741                      * equals 0, the algorithm doesn't use additional
53742                        memory (lower requirements, lower performance).
53743                      * equals 1, the algorithm uses additional
53744                        memory of size min(M,N)*min(M,N) of real numbers.
53745                        It often speeds up the algorithm.
53746                      * equals 2, the algorithm uses additional
53747                        memory of size M*min(M,N) of real numbers.
53748                        It allows to get a maximum performance.
53749                     The recommended value of the parameter is 2.
53750 
53751 Output parameters:
53752     W           -   contains singular values in descending order.
53753     U           -   if UNeeded=0, U isn't changed, the left singular vectors
53754                     are not calculated.
53755                     if Uneeded=1, U contains left singular vectors (first
53756                     min(M,N) columns of matrix U). Array whose indexes range
53757                     within [0..M-1, 0..Min(M,N)-1].
53758                     if UNeeded=2, U contains matrix U wholly. Array whose
53759                     indexes range within [0..M-1, 0..M-1].
53760     VT          -   if VTNeeded=0, VT isn't changed, the right singular vectors
53761                     are not calculated.
53762                     if VTNeeded=1, VT contains right singular vectors (first
53763                     min(M,N) rows of matrix V^T). Array whose indexes range
53764                     within [0..min(M,N)-1, 0..N-1].
53765                     if VTNeeded=2, VT contains matrix V^T wholly. Array whose
53766                     indexes range within [0..N-1, 0..N-1].
53767 
53768   -- ALGLIB --
53769      Copyright 2005 by Bochkanov Sergey
53770 *************************************************************************/
rmatrixsvd(ae_matrix * a,ae_int_t m,ae_int_t n,ae_int_t uneeded,ae_int_t vtneeded,ae_int_t additionalmemory,ae_vector * w,ae_matrix * u,ae_matrix * vt,ae_state * _state)53771 ae_bool rmatrixsvd(/* Real    */ ae_matrix* a,
53772      ae_int_t m,
53773      ae_int_t n,
53774      ae_int_t uneeded,
53775      ae_int_t vtneeded,
53776      ae_int_t additionalmemory,
53777      /* Real    */ ae_vector* w,
53778      /* Real    */ ae_matrix* u,
53779      /* Real    */ ae_matrix* vt,
53780      ae_state *_state)
53781 {
53782     ae_frame _frame_block;
53783     ae_matrix _a;
53784     ae_vector tauq;
53785     ae_vector taup;
53786     ae_vector tau;
53787     ae_vector e;
53788     ae_vector work;
53789     ae_matrix t2;
53790     ae_bool isupper;
53791     ae_int_t minmn;
53792     ae_int_t ncu;
53793     ae_int_t nrvt;
53794     ae_int_t nru;
53795     ae_int_t ncvt;
53796     ae_int_t i;
53797     ae_int_t j;
53798     ae_bool result;
53799 
53800     ae_frame_make(_state, &_frame_block);
53801     memset(&_a, 0, sizeof(_a));
53802     memset(&tauq, 0, sizeof(tauq));
53803     memset(&taup, 0, sizeof(taup));
53804     memset(&tau, 0, sizeof(tau));
53805     memset(&e, 0, sizeof(e));
53806     memset(&work, 0, sizeof(work));
53807     memset(&t2, 0, sizeof(t2));
53808     ae_matrix_init_copy(&_a, a, _state, ae_true);
53809     a = &_a;
53810     ae_vector_clear(w);
53811     ae_matrix_clear(u);
53812     ae_matrix_clear(vt);
53813     ae_vector_init(&tauq, 0, DT_REAL, _state, ae_true);
53814     ae_vector_init(&taup, 0, DT_REAL, _state, ae_true);
53815     ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
53816     ae_vector_init(&e, 0, DT_REAL, _state, ae_true);
53817     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
53818     ae_matrix_init(&t2, 0, 0, DT_REAL, _state, ae_true);
53819 
53820     result = ae_true;
53821     if( m==0||n==0 )
53822     {
53823         ae_frame_leave(_state);
53824         return result;
53825     }
53826     ae_assert(uneeded>=0&&uneeded<=2, "SVDDecomposition: wrong parameters!", _state);
53827     ae_assert(vtneeded>=0&&vtneeded<=2, "SVDDecomposition: wrong parameters!", _state);
53828     ae_assert(additionalmemory>=0&&additionalmemory<=2, "SVDDecomposition: wrong parameters!", _state);
53829 
53830     /*
53831      * initialize
53832      */
53833     minmn = ae_minint(m, n, _state);
53834     ae_vector_set_length(w, minmn+1, _state);
53835     ncu = 0;
53836     nru = 0;
53837     if( uneeded==1 )
53838     {
53839         nru = m;
53840         ncu = minmn;
53841         ae_matrix_set_length(u, nru-1+1, ncu-1+1, _state);
53842     }
53843     if( uneeded==2 )
53844     {
53845         nru = m;
53846         ncu = m;
53847         ae_matrix_set_length(u, nru-1+1, ncu-1+1, _state);
53848     }
53849     nrvt = 0;
53850     ncvt = 0;
53851     if( vtneeded==1 )
53852     {
53853         nrvt = minmn;
53854         ncvt = n;
53855         ae_matrix_set_length(vt, nrvt-1+1, ncvt-1+1, _state);
53856     }
53857     if( vtneeded==2 )
53858     {
53859         nrvt = n;
53860         ncvt = n;
53861         ae_matrix_set_length(vt, nrvt-1+1, ncvt-1+1, _state);
53862     }
53863 
53864     /*
53865      * M much larger than N
53866      * Use bidiagonal reduction with QR-decomposition
53867      */
53868     if( ae_fp_greater((double)(m),1.6*n) )
53869     {
53870         if( uneeded==0 )
53871         {
53872 
53873             /*
53874              * No left singular vectors to be computed
53875              */
53876             rmatrixqr(a, m, n, &tau, _state);
53877             for(i=0; i<=n-1; i++)
53878             {
53879                 for(j=0; j<=i-1; j++)
53880                 {
53881                     a->ptr.pp_double[i][j] = (double)(0);
53882                 }
53883             }
53884             rmatrixbd(a, n, n, &tauq, &taup, _state);
53885             rmatrixbdunpackpt(a, n, n, &taup, nrvt, vt, _state);
53886             rmatrixbdunpackdiagonals(a, n, n, &isupper, w, &e, _state);
53887             result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, 0, a, 0, vt, ncvt, _state);
53888             ae_frame_leave(_state);
53889             return result;
53890         }
53891         else
53892         {
53893 
53894             /*
53895              * Left singular vectors (may be full matrix U) to be computed
53896              */
53897             rmatrixqr(a, m, n, &tau, _state);
53898             rmatrixqrunpackq(a, m, n, &tau, ncu, u, _state);
53899             for(i=0; i<=n-1; i++)
53900             {
53901                 for(j=0; j<=i-1; j++)
53902                 {
53903                     a->ptr.pp_double[i][j] = (double)(0);
53904                 }
53905             }
53906             rmatrixbd(a, n, n, &tauq, &taup, _state);
53907             rmatrixbdunpackpt(a, n, n, &taup, nrvt, vt, _state);
53908             rmatrixbdunpackdiagonals(a, n, n, &isupper, w, &e, _state);
53909             if( additionalmemory<1 )
53910             {
53911 
53912                 /*
53913                  * No additional memory can be used
53914                  */
53915                 rmatrixbdmultiplybyq(a, n, n, &tauq, u, m, n, ae_true, ae_false, _state);
53916                 result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, m, a, 0, vt, ncvt, _state);
53917             }
53918             else
53919             {
53920 
53921                 /*
53922                  * Large U. Transforming intermediate matrix T2
53923                  */
53924                 ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
53925                 rmatrixbdunpackq(a, n, n, &tauq, n, &t2, _state);
53926                 copymatrix(u, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1, _state);
53927                 inplacetranspose(&t2, 0, n-1, 0, n-1, &work, _state);
53928                 result = rmatrixbdsvd(w, &e, n, isupper, ae_false, u, 0, &t2, n, vt, ncvt, _state);
53929                 rmatrixgemm(m, n, n, 1.0, a, 0, 0, 0, &t2, 0, 0, 1, 0.0, u, 0, 0, _state);
53930             }
53931             ae_frame_leave(_state);
53932             return result;
53933         }
53934     }
53935 
53936     /*
53937      * N much larger than M
53938      * Use bidiagonal reduction with LQ-decomposition
53939      */
53940     if( ae_fp_greater((double)(n),1.6*m) )
53941     {
53942         if( vtneeded==0 )
53943         {
53944 
53945             /*
53946              * No right singular vectors to be computed
53947              */
53948             rmatrixlq(a, m, n, &tau, _state);
53949             for(i=0; i<=m-1; i++)
53950             {
53951                 for(j=i+1; j<=m-1; j++)
53952                 {
53953                     a->ptr.pp_double[i][j] = (double)(0);
53954                 }
53955             }
53956             rmatrixbd(a, m, m, &tauq, &taup, _state);
53957             rmatrixbdunpackq(a, m, m, &tauq, ncu, u, _state);
53958             rmatrixbdunpackdiagonals(a, m, m, &isupper, w, &e, _state);
53959             ae_vector_set_length(&work, m+1, _state);
53960             inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
53961             result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, vt, 0, _state);
53962             inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
53963             ae_frame_leave(_state);
53964             return result;
53965         }
53966         else
53967         {
53968 
53969             /*
53970              * Right singular vectors (may be full matrix VT) to be computed
53971              */
53972             rmatrixlq(a, m, n, &tau, _state);
53973             rmatrixlqunpackq(a, m, n, &tau, nrvt, vt, _state);
53974             for(i=0; i<=m-1; i++)
53975             {
53976                 for(j=i+1; j<=m-1; j++)
53977                 {
53978                     a->ptr.pp_double[i][j] = (double)(0);
53979                 }
53980             }
53981             rmatrixbd(a, m, m, &tauq, &taup, _state);
53982             rmatrixbdunpackq(a, m, m, &tauq, ncu, u, _state);
53983             rmatrixbdunpackdiagonals(a, m, m, &isupper, w, &e, _state);
53984             ae_vector_set_length(&work, ae_maxint(m, n, _state)+1, _state);
53985             inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
53986             if( additionalmemory<1 )
53987             {
53988 
53989                 /*
53990                  * No additional memory available
53991                  */
53992                 rmatrixbdmultiplybyp(a, m, m, &taup, vt, m, n, ae_false, ae_true, _state);
53993                 result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, vt, n, _state);
53994             }
53995             else
53996             {
53997 
53998                 /*
53999                  * Large VT. Transforming intermediate matrix T2
54000                  */
54001                 rmatrixbdunpackpt(a, m, m, &taup, m, &t2, _state);
54002                 result = rmatrixbdsvd(w, &e, m, isupper, ae_false, a, 0, u, nru, &t2, m, _state);
54003                 copymatrix(vt, 0, m-1, 0, n-1, a, 0, m-1, 0, n-1, _state);
54004                 rmatrixgemm(m, n, m, 1.0, &t2, 0, 0, 0, a, 0, 0, 0, 0.0, vt, 0, 0, _state);
54005             }
54006             inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
54007             ae_frame_leave(_state);
54008             return result;
54009         }
54010     }
54011 
54012     /*
54013      * M<=N
54014      * We can use inplace transposition of U to get rid of columnwise operations
54015      */
54016     if( m<=n )
54017     {
54018         rmatrixbd(a, m, n, &tauq, &taup, _state);
54019         rmatrixbdunpackq(a, m, n, &tauq, ncu, u, _state);
54020         rmatrixbdunpackpt(a, m, n, &taup, nrvt, vt, _state);
54021         rmatrixbdunpackdiagonals(a, m, n, &isupper, w, &e, _state);
54022         ae_vector_set_length(&work, m+1, _state);
54023         inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
54024         result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, a, 0, u, nru, vt, ncvt, _state);
54025         inplacetranspose(u, 0, nru-1, 0, ncu-1, &work, _state);
54026         ae_frame_leave(_state);
54027         return result;
54028     }
54029 
54030     /*
54031      * Simple bidiagonal reduction
54032      */
54033     rmatrixbd(a, m, n, &tauq, &taup, _state);
54034     rmatrixbdunpackq(a, m, n, &tauq, ncu, u, _state);
54035     rmatrixbdunpackpt(a, m, n, &taup, nrvt, vt, _state);
54036     rmatrixbdunpackdiagonals(a, m, n, &isupper, w, &e, _state);
54037     if( additionalmemory<2||uneeded==0 )
54038     {
54039 
54040         /*
54041          * We cant use additional memory or there is no need in such operations
54042          */
54043         result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, u, nru, a, 0, vt, ncvt, _state);
54044     }
54045     else
54046     {
54047 
54048         /*
54049          * We can use additional memory
54050          */
54051         ae_matrix_set_length(&t2, minmn-1+1, m-1+1, _state);
54052         copyandtranspose(u, 0, m-1, 0, minmn-1, &t2, 0, minmn-1, 0, m-1, _state);
54053         result = rmatrixbdsvd(w, &e, minmn, isupper, ae_false, u, 0, &t2, m, vt, ncvt, _state);
54054         copyandtranspose(&t2, 0, minmn-1, 0, m-1, u, 0, m-1, 0, minmn-1, _state);
54055     }
54056     ae_frame_leave(_state);
54057     return result;
54058 }
54059 
54060 
54061 #endif
54062 #if defined(AE_COMPILE_RCOND) || !defined(AE_PARTIAL_BUILD)
54063 
54064 
54065 /*************************************************************************
54066 Estimate of a matrix condition number (1-norm)
54067 
54068 The algorithm calculates a lower bound of the condition number. In this case,
54069 the algorithm does not return a lower bound of the condition number, but an
54070 inverse number (to avoid an overflow in case of a singular matrix).
54071 
54072 Input parameters:
54073     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
54074     N   -   size of matrix A.
54075 
54076 Result: 1/LowerBound(cond(A))
54077 
54078 NOTE:
54079     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54080     0.0 is returned in such cases.
54081 *************************************************************************/
rmatrixrcond1(ae_matrix * a,ae_int_t n,ae_state * _state)54082 double rmatrixrcond1(/* Real    */ ae_matrix* a,
54083      ae_int_t n,
54084      ae_state *_state)
54085 {
54086     ae_frame _frame_block;
54087     ae_matrix _a;
54088     ae_int_t i;
54089     ae_int_t j;
54090     double v;
54091     double nrm;
54092     ae_vector pivots;
54093     ae_vector t;
54094     double result;
54095 
54096     ae_frame_make(_state, &_frame_block);
54097     memset(&_a, 0, sizeof(_a));
54098     memset(&pivots, 0, sizeof(pivots));
54099     memset(&t, 0, sizeof(t));
54100     ae_matrix_init_copy(&_a, a, _state, ae_true);
54101     a = &_a;
54102     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
54103     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
54104 
54105     ae_assert(n>=1, "RMatrixRCond1: N<1!", _state);
54106     ae_vector_set_length(&t, n, _state);
54107     for(i=0; i<=n-1; i++)
54108     {
54109         t.ptr.p_double[i] = (double)(0);
54110     }
54111     for(i=0; i<=n-1; i++)
54112     {
54113         for(j=0; j<=n-1; j++)
54114         {
54115             t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
54116         }
54117     }
54118     nrm = (double)(0);
54119     for(i=0; i<=n-1; i++)
54120     {
54121         nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
54122     }
54123     rmatrixlu(a, n, n, &pivots, _state);
54124     rcond_rmatrixrcondluinternal(a, n, ae_true, ae_true, nrm, &v, _state);
54125     result = v;
54126     ae_frame_leave(_state);
54127     return result;
54128 }
54129 
54130 
54131 /*************************************************************************
54132 Estimate of a matrix condition number (infinity-norm).
54133 
54134 The algorithm calculates a lower bound of the condition number. In this case,
54135 the algorithm does not return a lower bound of the condition number, but an
54136 inverse number (to avoid an overflow in case of a singular matrix).
54137 
54138 Input parameters:
54139     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
54140     N   -   size of matrix A.
54141 
54142 Result: 1/LowerBound(cond(A))
54143 
54144 NOTE:
54145     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54146     0.0 is returned in such cases.
54147 *************************************************************************/
rmatrixrcondinf(ae_matrix * a,ae_int_t n,ae_state * _state)54148 double rmatrixrcondinf(/* Real    */ ae_matrix* a,
54149      ae_int_t n,
54150      ae_state *_state)
54151 {
54152     ae_frame _frame_block;
54153     ae_matrix _a;
54154     ae_int_t i;
54155     ae_int_t j;
54156     double v;
54157     double nrm;
54158     ae_vector pivots;
54159     double result;
54160 
54161     ae_frame_make(_state, &_frame_block);
54162     memset(&_a, 0, sizeof(_a));
54163     memset(&pivots, 0, sizeof(pivots));
54164     ae_matrix_init_copy(&_a, a, _state, ae_true);
54165     a = &_a;
54166     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
54167 
54168     ae_assert(n>=1, "RMatrixRCondInf: N<1!", _state);
54169     nrm = (double)(0);
54170     for(i=0; i<=n-1; i++)
54171     {
54172         v = (double)(0);
54173         for(j=0; j<=n-1; j++)
54174         {
54175             v = v+ae_fabs(a->ptr.pp_double[i][j], _state);
54176         }
54177         nrm = ae_maxreal(nrm, v, _state);
54178     }
54179     rmatrixlu(a, n, n, &pivots, _state);
54180     rcond_rmatrixrcondluinternal(a, n, ae_false, ae_true, nrm, &v, _state);
54181     result = v;
54182     ae_frame_leave(_state);
54183     return result;
54184 }
54185 
54186 
54187 /*************************************************************************
54188 Condition number estimate of a symmetric positive definite matrix.
54189 
54190 The algorithm calculates a lower bound of the condition number. In this case,
54191 the algorithm does not return a lower bound of the condition number, but an
54192 inverse number (to avoid an overflow in case of a singular matrix).
54193 
54194 It should be noted that 1-norm and inf-norm of condition numbers of symmetric
54195 matrices are equal, so the algorithm doesn't take into account the
54196 differences between these types of norms.
54197 
54198 Input parameters:
54199     A       -   symmetric positive definite matrix which is given by its
54200                 upper or lower triangle depending on the value of
54201                 IsUpper. Array with elements [0..N-1, 0..N-1].
54202     N       -   size of matrix A.
54203     IsUpper -   storage format.
54204 
54205 Result:
54206     1/LowerBound(cond(A)), if matrix A is positive definite,
54207    -1, if matrix A is not positive definite, and its condition number
54208     could not be found by this algorithm.
54209 
54210 NOTE:
54211     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54212     0.0 is returned in such cases.
54213 *************************************************************************/
spdmatrixrcond(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_state * _state)54214 double spdmatrixrcond(/* Real    */ ae_matrix* a,
54215      ae_int_t n,
54216      ae_bool isupper,
54217      ae_state *_state)
54218 {
54219     ae_frame _frame_block;
54220     ae_matrix _a;
54221     ae_int_t i;
54222     ae_int_t j;
54223     ae_int_t j1;
54224     ae_int_t j2;
54225     double v;
54226     double nrm;
54227     ae_vector t;
54228     double result;
54229 
54230     ae_frame_make(_state, &_frame_block);
54231     memset(&_a, 0, sizeof(_a));
54232     memset(&t, 0, sizeof(t));
54233     ae_matrix_init_copy(&_a, a, _state, ae_true);
54234     a = &_a;
54235     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
54236 
54237     ae_vector_set_length(&t, n, _state);
54238     for(i=0; i<=n-1; i++)
54239     {
54240         t.ptr.p_double[i] = (double)(0);
54241     }
54242     for(i=0; i<=n-1; i++)
54243     {
54244         if( isupper )
54245         {
54246             j1 = i;
54247             j2 = n-1;
54248         }
54249         else
54250         {
54251             j1 = 0;
54252             j2 = i;
54253         }
54254         for(j=j1; j<=j2; j++)
54255         {
54256             if( i==j )
54257             {
54258                 t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][i], _state);
54259             }
54260             else
54261             {
54262                 t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][j], _state);
54263                 t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
54264             }
54265         }
54266     }
54267     nrm = (double)(0);
54268     for(i=0; i<=n-1; i++)
54269     {
54270         nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
54271     }
54272     if( spdmatrixcholesky(a, n, isupper, _state) )
54273     {
54274         rcond_spdmatrixrcondcholeskyinternal(a, n, isupper, ae_true, nrm, &v, _state);
54275         result = v;
54276     }
54277     else
54278     {
54279         result = (double)(-1);
54280     }
54281     ae_frame_leave(_state);
54282     return result;
54283 }
54284 
54285 
54286 /*************************************************************************
54287 Triangular matrix: estimate of a condition number (1-norm)
54288 
54289 The algorithm calculates a lower bound of the condition number. In this case,
54290 the algorithm does not return a lower bound of the condition number, but an
54291 inverse number (to avoid an overflow in case of a singular matrix).
54292 
54293 Input parameters:
54294     A       -   matrix. Array[0..N-1, 0..N-1].
54295     N       -   size of A.
54296     IsUpper -   True, if the matrix is upper triangular.
54297     IsUnit  -   True, if the matrix has a unit diagonal.
54298 
54299 Result: 1/LowerBound(cond(A))
54300 
54301 NOTE:
54302     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54303     0.0 is returned in such cases.
54304 *************************************************************************/
rmatrixtrrcond1(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_state * _state)54305 double rmatrixtrrcond1(/* Real    */ ae_matrix* a,
54306      ae_int_t n,
54307      ae_bool isupper,
54308      ae_bool isunit,
54309      ae_state *_state)
54310 {
54311     ae_frame _frame_block;
54312     ae_int_t i;
54313     ae_int_t j;
54314     double v;
54315     double nrm;
54316     ae_vector pivots;
54317     ae_vector t;
54318     ae_int_t j1;
54319     ae_int_t j2;
54320     double result;
54321 
54322     ae_frame_make(_state, &_frame_block);
54323     memset(&pivots, 0, sizeof(pivots));
54324     memset(&t, 0, sizeof(t));
54325     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
54326     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
54327 
54328     ae_assert(n>=1, "RMatrixTRRCond1: N<1!", _state);
54329     ae_vector_set_length(&t, n, _state);
54330     for(i=0; i<=n-1; i++)
54331     {
54332         t.ptr.p_double[i] = (double)(0);
54333     }
54334     for(i=0; i<=n-1; i++)
54335     {
54336         if( isupper )
54337         {
54338             j1 = i+1;
54339             j2 = n-1;
54340         }
54341         else
54342         {
54343             j1 = 0;
54344             j2 = i-1;
54345         }
54346         for(j=j1; j<=j2; j++)
54347         {
54348             t.ptr.p_double[j] = t.ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
54349         }
54350         if( isunit )
54351         {
54352             t.ptr.p_double[i] = t.ptr.p_double[i]+1;
54353         }
54354         else
54355         {
54356             t.ptr.p_double[i] = t.ptr.p_double[i]+ae_fabs(a->ptr.pp_double[i][i], _state);
54357         }
54358     }
54359     nrm = (double)(0);
54360     for(i=0; i<=n-1; i++)
54361     {
54362         nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
54363     }
54364     rcond_rmatrixrcondtrinternal(a, n, isupper, isunit, ae_true, nrm, &v, _state);
54365     result = v;
54366     ae_frame_leave(_state);
54367     return result;
54368 }
54369 
54370 
54371 /*************************************************************************
54372 Triangular matrix: estimate of a matrix condition number (infinity-norm).
54373 
54374 The algorithm calculates a lower bound of the condition number. In this case,
54375 the algorithm does not return a lower bound of the condition number, but an
54376 inverse number (to avoid an overflow in case of a singular matrix).
54377 
54378 Input parameters:
54379     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
54380     N   -   size of matrix A.
54381     IsUpper -   True, if the matrix is upper triangular.
54382     IsUnit  -   True, if the matrix has a unit diagonal.
54383 
54384 Result: 1/LowerBound(cond(A))
54385 
54386 NOTE:
54387     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54388     0.0 is returned in such cases.
54389 *************************************************************************/
rmatrixtrrcondinf(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_state * _state)54390 double rmatrixtrrcondinf(/* Real    */ ae_matrix* a,
54391      ae_int_t n,
54392      ae_bool isupper,
54393      ae_bool isunit,
54394      ae_state *_state)
54395 {
54396     ae_frame _frame_block;
54397     ae_int_t i;
54398     ae_int_t j;
54399     double v;
54400     double nrm;
54401     ae_vector pivots;
54402     ae_int_t j1;
54403     ae_int_t j2;
54404     double result;
54405 
54406     ae_frame_make(_state, &_frame_block);
54407     memset(&pivots, 0, sizeof(pivots));
54408     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
54409 
54410     ae_assert(n>=1, "RMatrixTRRCondInf: N<1!", _state);
54411     nrm = (double)(0);
54412     for(i=0; i<=n-1; i++)
54413     {
54414         if( isupper )
54415         {
54416             j1 = i+1;
54417             j2 = n-1;
54418         }
54419         else
54420         {
54421             j1 = 0;
54422             j2 = i-1;
54423         }
54424         v = (double)(0);
54425         for(j=j1; j<=j2; j++)
54426         {
54427             v = v+ae_fabs(a->ptr.pp_double[i][j], _state);
54428         }
54429         if( isunit )
54430         {
54431             v = v+1;
54432         }
54433         else
54434         {
54435             v = v+ae_fabs(a->ptr.pp_double[i][i], _state);
54436         }
54437         nrm = ae_maxreal(nrm, v, _state);
54438     }
54439     rcond_rmatrixrcondtrinternal(a, n, isupper, isunit, ae_false, nrm, &v, _state);
54440     result = v;
54441     ae_frame_leave(_state);
54442     return result;
54443 }
54444 
54445 
54446 /*************************************************************************
54447 Condition number estimate of a Hermitian positive definite matrix.
54448 
54449 The algorithm calculates a lower bound of the condition number. In this case,
54450 the algorithm does not return a lower bound of the condition number, but an
54451 inverse number (to avoid an overflow in case of a singular matrix).
54452 
54453 It should be noted that 1-norm and inf-norm of condition numbers of symmetric
54454 matrices are equal, so the algorithm doesn't take into account the
54455 differences between these types of norms.
54456 
54457 Input parameters:
54458     A       -   Hermitian positive definite matrix which is given by its
54459                 upper or lower triangle depending on the value of
54460                 IsUpper. Array with elements [0..N-1, 0..N-1].
54461     N       -   size of matrix A.
54462     IsUpper -   storage format.
54463 
54464 Result:
54465     1/LowerBound(cond(A)), if matrix A is positive definite,
54466    -1, if matrix A is not positive definite, and its condition number
54467     could not be found by this algorithm.
54468 
54469 NOTE:
54470     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54471     0.0 is returned in such cases.
54472 *************************************************************************/
hpdmatrixrcond(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_state * _state)54473 double hpdmatrixrcond(/* Complex */ ae_matrix* a,
54474      ae_int_t n,
54475      ae_bool isupper,
54476      ae_state *_state)
54477 {
54478     ae_frame _frame_block;
54479     ae_matrix _a;
54480     ae_int_t i;
54481     ae_int_t j;
54482     ae_int_t j1;
54483     ae_int_t j2;
54484     double v;
54485     double nrm;
54486     ae_vector t;
54487     double result;
54488 
54489     ae_frame_make(_state, &_frame_block);
54490     memset(&_a, 0, sizeof(_a));
54491     memset(&t, 0, sizeof(t));
54492     ae_matrix_init_copy(&_a, a, _state, ae_true);
54493     a = &_a;
54494     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
54495 
54496     ae_vector_set_length(&t, n, _state);
54497     for(i=0; i<=n-1; i++)
54498     {
54499         t.ptr.p_double[i] = (double)(0);
54500     }
54501     for(i=0; i<=n-1; i++)
54502     {
54503         if( isupper )
54504         {
54505             j1 = i;
54506             j2 = n-1;
54507         }
54508         else
54509         {
54510             j1 = 0;
54511             j2 = i;
54512         }
54513         for(j=j1; j<=j2; j++)
54514         {
54515             if( i==j )
54516             {
54517                 t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][i], _state);
54518             }
54519             else
54520             {
54521                 t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
54522                 t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
54523             }
54524         }
54525     }
54526     nrm = (double)(0);
54527     for(i=0; i<=n-1; i++)
54528     {
54529         nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
54530     }
54531     if( hpdmatrixcholesky(a, n, isupper, _state) )
54532     {
54533         rcond_hpdmatrixrcondcholeskyinternal(a, n, isupper, ae_true, nrm, &v, _state);
54534         result = v;
54535     }
54536     else
54537     {
54538         result = (double)(-1);
54539     }
54540     ae_frame_leave(_state);
54541     return result;
54542 }
54543 
54544 
54545 /*************************************************************************
54546 Estimate of a matrix condition number (1-norm)
54547 
54548 The algorithm calculates a lower bound of the condition number. In this case,
54549 the algorithm does not return a lower bound of the condition number, but an
54550 inverse number (to avoid an overflow in case of a singular matrix).
54551 
54552 Input parameters:
54553     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
54554     N   -   size of matrix A.
54555 
54556 Result: 1/LowerBound(cond(A))
54557 
54558 NOTE:
54559     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54560     0.0 is returned in such cases.
54561 *************************************************************************/
cmatrixrcond1(ae_matrix * a,ae_int_t n,ae_state * _state)54562 double cmatrixrcond1(/* Complex */ ae_matrix* a,
54563      ae_int_t n,
54564      ae_state *_state)
54565 {
54566     ae_frame _frame_block;
54567     ae_matrix _a;
54568     ae_int_t i;
54569     ae_int_t j;
54570     double v;
54571     double nrm;
54572     ae_vector pivots;
54573     ae_vector t;
54574     double result;
54575 
54576     ae_frame_make(_state, &_frame_block);
54577     memset(&_a, 0, sizeof(_a));
54578     memset(&pivots, 0, sizeof(pivots));
54579     memset(&t, 0, sizeof(t));
54580     ae_matrix_init_copy(&_a, a, _state, ae_true);
54581     a = &_a;
54582     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
54583     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
54584 
54585     ae_assert(n>=1, "CMatrixRCond1: N<1!", _state);
54586     ae_vector_set_length(&t, n, _state);
54587     for(i=0; i<=n-1; i++)
54588     {
54589         t.ptr.p_double[i] = (double)(0);
54590     }
54591     for(i=0; i<=n-1; i++)
54592     {
54593         for(j=0; j<=n-1; j++)
54594         {
54595             t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
54596         }
54597     }
54598     nrm = (double)(0);
54599     for(i=0; i<=n-1; i++)
54600     {
54601         nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
54602     }
54603     cmatrixlu(a, n, n, &pivots, _state);
54604     rcond_cmatrixrcondluinternal(a, n, ae_true, ae_true, nrm, &v, _state);
54605     result = v;
54606     ae_frame_leave(_state);
54607     return result;
54608 }
54609 
54610 
54611 /*************************************************************************
54612 Estimate of a matrix condition number (infinity-norm).
54613 
54614 The algorithm calculates a lower bound of the condition number. In this case,
54615 the algorithm does not return a lower bound of the condition number, but an
54616 inverse number (to avoid an overflow in case of a singular matrix).
54617 
54618 Input parameters:
54619     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
54620     N   -   size of matrix A.
54621 
54622 Result: 1/LowerBound(cond(A))
54623 
54624 NOTE:
54625     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54626     0.0 is returned in such cases.
54627 *************************************************************************/
cmatrixrcondinf(ae_matrix * a,ae_int_t n,ae_state * _state)54628 double cmatrixrcondinf(/* Complex */ ae_matrix* a,
54629      ae_int_t n,
54630      ae_state *_state)
54631 {
54632     ae_frame _frame_block;
54633     ae_matrix _a;
54634     ae_int_t i;
54635     ae_int_t j;
54636     double v;
54637     double nrm;
54638     ae_vector pivots;
54639     double result;
54640 
54641     ae_frame_make(_state, &_frame_block);
54642     memset(&_a, 0, sizeof(_a));
54643     memset(&pivots, 0, sizeof(pivots));
54644     ae_matrix_init_copy(&_a, a, _state, ae_true);
54645     a = &_a;
54646     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
54647 
54648     ae_assert(n>=1, "CMatrixRCondInf: N<1!", _state);
54649     nrm = (double)(0);
54650     for(i=0; i<=n-1; i++)
54651     {
54652         v = (double)(0);
54653         for(j=0; j<=n-1; j++)
54654         {
54655             v = v+ae_c_abs(a->ptr.pp_complex[i][j], _state);
54656         }
54657         nrm = ae_maxreal(nrm, v, _state);
54658     }
54659     cmatrixlu(a, n, n, &pivots, _state);
54660     rcond_cmatrixrcondluinternal(a, n, ae_false, ae_true, nrm, &v, _state);
54661     result = v;
54662     ae_frame_leave(_state);
54663     return result;
54664 }
54665 
54666 
54667 /*************************************************************************
54668 Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
54669 
54670 The algorithm calculates a lower bound of the condition number. In this case,
54671 the algorithm does not return a lower bound of the condition number, but an
54672 inverse number (to avoid an overflow in case of a singular matrix).
54673 
54674 Input parameters:
54675     LUA         -   LU decomposition of a matrix in compact form. Output of
54676                     the RMatrixLU subroutine.
54677     N           -   size of matrix A.
54678 
54679 Result: 1/LowerBound(cond(A))
54680 
54681 NOTE:
54682     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54683     0.0 is returned in such cases.
54684 *************************************************************************/
rmatrixlurcond1(ae_matrix * lua,ae_int_t n,ae_state * _state)54685 double rmatrixlurcond1(/* Real    */ ae_matrix* lua,
54686      ae_int_t n,
54687      ae_state *_state)
54688 {
54689     double v;
54690     double result;
54691 
54692 
54693     rcond_rmatrixrcondluinternal(lua, n, ae_true, ae_false, (double)(0), &v, _state);
54694     result = v;
54695     return result;
54696 }
54697 
54698 
54699 /*************************************************************************
54700 Estimate of the condition number of a matrix given by its LU decomposition
54701 (infinity norm).
54702 
54703 The algorithm calculates a lower bound of the condition number. In this case,
54704 the algorithm does not return a lower bound of the condition number, but an
54705 inverse number (to avoid an overflow in case of a singular matrix).
54706 
54707 Input parameters:
54708     LUA     -   LU decomposition of a matrix in compact form. Output of
54709                 the RMatrixLU subroutine.
54710     N       -   size of matrix A.
54711 
54712 Result: 1/LowerBound(cond(A))
54713 
54714 NOTE:
54715     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54716     0.0 is returned in such cases.
54717 *************************************************************************/
rmatrixlurcondinf(ae_matrix * lua,ae_int_t n,ae_state * _state)54718 double rmatrixlurcondinf(/* Real    */ ae_matrix* lua,
54719      ae_int_t n,
54720      ae_state *_state)
54721 {
54722     double v;
54723     double result;
54724 
54725 
54726     rcond_rmatrixrcondluinternal(lua, n, ae_false, ae_false, (double)(0), &v, _state);
54727     result = v;
54728     return result;
54729 }
54730 
54731 
54732 /*************************************************************************
54733 Condition number estimate of a symmetric positive definite matrix given by
54734 Cholesky decomposition.
54735 
54736 The algorithm calculates a lower bound of the condition number. In this
54737 case, the algorithm does not return a lower bound of the condition number,
54738 but an inverse number (to avoid an overflow in case of a singular matrix).
54739 
54740 It should be noted that 1-norm and inf-norm condition numbers of symmetric
54741 matrices are equal, so the algorithm doesn't take into account the
54742 differences between these types of norms.
54743 
54744 Input parameters:
54745     CD  - Cholesky decomposition of matrix A,
54746           output of SMatrixCholesky subroutine.
54747     N   - size of matrix A.
54748 
54749 Result: 1/LowerBound(cond(A))
54750 
54751 NOTE:
54752     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54753     0.0 is returned in such cases.
54754 *************************************************************************/
spdmatrixcholeskyrcond(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_state * _state)54755 double spdmatrixcholeskyrcond(/* Real    */ ae_matrix* a,
54756      ae_int_t n,
54757      ae_bool isupper,
54758      ae_state *_state)
54759 {
54760     double v;
54761     double result;
54762 
54763 
54764     rcond_spdmatrixrcondcholeskyinternal(a, n, isupper, ae_false, (double)(0), &v, _state);
54765     result = v;
54766     return result;
54767 }
54768 
54769 
54770 /*************************************************************************
54771 Condition number estimate of a Hermitian positive definite matrix given by
54772 Cholesky decomposition.
54773 
54774 The algorithm calculates a lower bound of the condition number. In this
54775 case, the algorithm does not return a lower bound of the condition number,
54776 but an inverse number (to avoid an overflow in case of a singular matrix).
54777 
54778 It should be noted that 1-norm and inf-norm condition numbers of symmetric
54779 matrices are equal, so the algorithm doesn't take into account the
54780 differences between these types of norms.
54781 
54782 Input parameters:
54783     CD  - Cholesky decomposition of matrix A,
54784           output of SMatrixCholesky subroutine.
54785     N   - size of matrix A.
54786 
54787 Result: 1/LowerBound(cond(A))
54788 
54789 NOTE:
54790     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54791     0.0 is returned in such cases.
54792 *************************************************************************/
hpdmatrixcholeskyrcond(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_state * _state)54793 double hpdmatrixcholeskyrcond(/* Complex */ ae_matrix* a,
54794      ae_int_t n,
54795      ae_bool isupper,
54796      ae_state *_state)
54797 {
54798     double v;
54799     double result;
54800 
54801 
54802     rcond_hpdmatrixrcondcholeskyinternal(a, n, isupper, ae_false, (double)(0), &v, _state);
54803     result = v;
54804     return result;
54805 }
54806 
54807 
54808 /*************************************************************************
54809 Estimate of the condition number of a matrix given by its LU decomposition (1-norm)
54810 
54811 The algorithm calculates a lower bound of the condition number. In this case,
54812 the algorithm does not return a lower bound of the condition number, but an
54813 inverse number (to avoid an overflow in case of a singular matrix).
54814 
54815 Input parameters:
54816     LUA         -   LU decomposition of a matrix in compact form. Output of
54817                     the CMatrixLU subroutine.
54818     N           -   size of matrix A.
54819 
54820 Result: 1/LowerBound(cond(A))
54821 
54822 NOTE:
54823     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54824     0.0 is returned in such cases.
54825 *************************************************************************/
cmatrixlurcond1(ae_matrix * lua,ae_int_t n,ae_state * _state)54826 double cmatrixlurcond1(/* Complex */ ae_matrix* lua,
54827      ae_int_t n,
54828      ae_state *_state)
54829 {
54830     double v;
54831     double result;
54832 
54833 
54834     ae_assert(n>=1, "CMatrixLURCond1: N<1!", _state);
54835     rcond_cmatrixrcondluinternal(lua, n, ae_true, ae_false, 0.0, &v, _state);
54836     result = v;
54837     return result;
54838 }
54839 
54840 
54841 /*************************************************************************
54842 Estimate of the condition number of a matrix given by its LU decomposition
54843 (infinity norm).
54844 
54845 The algorithm calculates a lower bound of the condition number. In this case,
54846 the algorithm does not return a lower bound of the condition number, but an
54847 inverse number (to avoid an overflow in case of a singular matrix).
54848 
54849 Input parameters:
54850     LUA     -   LU decomposition of a matrix in compact form. Output of
54851                 the CMatrixLU subroutine.
54852     N       -   size of matrix A.
54853 
54854 Result: 1/LowerBound(cond(A))
54855 
54856 NOTE:
54857     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54858     0.0 is returned in such cases.
54859 *************************************************************************/
cmatrixlurcondinf(ae_matrix * lua,ae_int_t n,ae_state * _state)54860 double cmatrixlurcondinf(/* Complex */ ae_matrix* lua,
54861      ae_int_t n,
54862      ae_state *_state)
54863 {
54864     double v;
54865     double result;
54866 
54867 
54868     ae_assert(n>=1, "CMatrixLURCondInf: N<1!", _state);
54869     rcond_cmatrixrcondluinternal(lua, n, ae_false, ae_false, 0.0, &v, _state);
54870     result = v;
54871     return result;
54872 }
54873 
54874 
54875 /*************************************************************************
54876 Triangular matrix: estimate of a condition number (1-norm)
54877 
54878 The algorithm calculates a lower bound of the condition number. In this case,
54879 the algorithm does not return a lower bound of the condition number, but an
54880 inverse number (to avoid an overflow in case of a singular matrix).
54881 
54882 Input parameters:
54883     A       -   matrix. Array[0..N-1, 0..N-1].
54884     N       -   size of A.
54885     IsUpper -   True, if the matrix is upper triangular.
54886     IsUnit  -   True, if the matrix has a unit diagonal.
54887 
54888 Result: 1/LowerBound(cond(A))
54889 
54890 NOTE:
54891     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54892     0.0 is returned in such cases.
54893 *************************************************************************/
cmatrixtrrcond1(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_state * _state)54894 double cmatrixtrrcond1(/* Complex */ ae_matrix* a,
54895      ae_int_t n,
54896      ae_bool isupper,
54897      ae_bool isunit,
54898      ae_state *_state)
54899 {
54900     ae_frame _frame_block;
54901     ae_int_t i;
54902     ae_int_t j;
54903     double v;
54904     double nrm;
54905     ae_vector pivots;
54906     ae_vector t;
54907     ae_int_t j1;
54908     ae_int_t j2;
54909     double result;
54910 
54911     ae_frame_make(_state, &_frame_block);
54912     memset(&pivots, 0, sizeof(pivots));
54913     memset(&t, 0, sizeof(t));
54914     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
54915     ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
54916 
54917     ae_assert(n>=1, "RMatrixTRRCond1: N<1!", _state);
54918     ae_vector_set_length(&t, n, _state);
54919     for(i=0; i<=n-1; i++)
54920     {
54921         t.ptr.p_double[i] = (double)(0);
54922     }
54923     for(i=0; i<=n-1; i++)
54924     {
54925         if( isupper )
54926         {
54927             j1 = i+1;
54928             j2 = n-1;
54929         }
54930         else
54931         {
54932             j1 = 0;
54933             j2 = i-1;
54934         }
54935         for(j=j1; j<=j2; j++)
54936         {
54937             t.ptr.p_double[j] = t.ptr.p_double[j]+ae_c_abs(a->ptr.pp_complex[i][j], _state);
54938         }
54939         if( isunit )
54940         {
54941             t.ptr.p_double[i] = t.ptr.p_double[i]+1;
54942         }
54943         else
54944         {
54945             t.ptr.p_double[i] = t.ptr.p_double[i]+ae_c_abs(a->ptr.pp_complex[i][i], _state);
54946         }
54947     }
54948     nrm = (double)(0);
54949     for(i=0; i<=n-1; i++)
54950     {
54951         nrm = ae_maxreal(nrm, t.ptr.p_double[i], _state);
54952     }
54953     rcond_cmatrixrcondtrinternal(a, n, isupper, isunit, ae_true, nrm, &v, _state);
54954     result = v;
54955     ae_frame_leave(_state);
54956     return result;
54957 }
54958 
54959 
54960 /*************************************************************************
54961 Triangular matrix: estimate of a matrix condition number (infinity-norm).
54962 
54963 The algorithm calculates a lower bound of the condition number. In this case,
54964 the algorithm does not return a lower bound of the condition number, but an
54965 inverse number (to avoid an overflow in case of a singular matrix).
54966 
54967 Input parameters:
54968     A   -   matrix. Array whose indexes range within [0..N-1, 0..N-1].
54969     N   -   size of matrix A.
54970     IsUpper -   True, if the matrix is upper triangular.
54971     IsUnit  -   True, if the matrix has a unit diagonal.
54972 
54973 Result: 1/LowerBound(cond(A))
54974 
54975 NOTE:
54976     if k(A) is very large, then matrix is  assumed  degenerate,  k(A)=INF,
54977     0.0 is returned in such cases.
54978 *************************************************************************/
cmatrixtrrcondinf(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_state * _state)54979 double cmatrixtrrcondinf(/* Complex */ ae_matrix* a,
54980      ae_int_t n,
54981      ae_bool isupper,
54982      ae_bool isunit,
54983      ae_state *_state)
54984 {
54985     ae_frame _frame_block;
54986     ae_int_t i;
54987     ae_int_t j;
54988     double v;
54989     double nrm;
54990     ae_vector pivots;
54991     ae_int_t j1;
54992     ae_int_t j2;
54993     double result;
54994 
54995     ae_frame_make(_state, &_frame_block);
54996     memset(&pivots, 0, sizeof(pivots));
54997     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
54998 
54999     ae_assert(n>=1, "RMatrixTRRCondInf: N<1!", _state);
55000     nrm = (double)(0);
55001     for(i=0; i<=n-1; i++)
55002     {
55003         if( isupper )
55004         {
55005             j1 = i+1;
55006             j2 = n-1;
55007         }
55008         else
55009         {
55010             j1 = 0;
55011             j2 = i-1;
55012         }
55013         v = (double)(0);
55014         for(j=j1; j<=j2; j++)
55015         {
55016             v = v+ae_c_abs(a->ptr.pp_complex[i][j], _state);
55017         }
55018         if( isunit )
55019         {
55020             v = v+1;
55021         }
55022         else
55023         {
55024             v = v+ae_c_abs(a->ptr.pp_complex[i][i], _state);
55025         }
55026         nrm = ae_maxreal(nrm, v, _state);
55027     }
55028     rcond_cmatrixrcondtrinternal(a, n, isupper, isunit, ae_false, nrm, &v, _state);
55029     result = v;
55030     ae_frame_leave(_state);
55031     return result;
55032 }
55033 
55034 
55035 /*************************************************************************
55036 Threshold for rcond: matrices with condition number beyond this  threshold
55037 are considered singular.
55038 
55039 Threshold must be far enough from underflow, at least Sqr(Threshold)  must
55040 be greater than underflow.
55041 *************************************************************************/
rcondthreshold(ae_state * _state)55042 double rcondthreshold(ae_state *_state)
55043 {
55044     double result;
55045 
55046 
55047     result = ae_sqrt(ae_sqrt(ae_minrealnumber, _state), _state);
55048     return result;
55049 }
55050 
55051 
55052 /*************************************************************************
55053 Internal subroutine for condition number estimation
55054 
55055   -- LAPACK routine (version 3.0) --
55056      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
55057      Courant Institute, Argonne National Lab, and Rice University
55058      February 29, 1992
55059 *************************************************************************/
rcond_rmatrixrcondtrinternal(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_bool onenorm,double anorm,double * rc,ae_state * _state)55060 static void rcond_rmatrixrcondtrinternal(/* Real    */ ae_matrix* a,
55061      ae_int_t n,
55062      ae_bool isupper,
55063      ae_bool isunit,
55064      ae_bool onenorm,
55065      double anorm,
55066      double* rc,
55067      ae_state *_state)
55068 {
55069     ae_frame _frame_block;
55070     ae_vector ex;
55071     ae_vector ev;
55072     ae_vector iwork;
55073     ae_vector tmp;
55074     ae_int_t i;
55075     ae_int_t j;
55076     ae_int_t kase;
55077     ae_int_t kase1;
55078     ae_int_t j1;
55079     ae_int_t j2;
55080     double ainvnm;
55081     double maxgrowth;
55082     double s;
55083 
55084     ae_frame_make(_state, &_frame_block);
55085     memset(&ex, 0, sizeof(ex));
55086     memset(&ev, 0, sizeof(ev));
55087     memset(&iwork, 0, sizeof(iwork));
55088     memset(&tmp, 0, sizeof(tmp));
55089     *rc = 0;
55090     ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
55091     ae_vector_init(&ev, 0, DT_REAL, _state, ae_true);
55092     ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
55093     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
55094 
55095 
55096     /*
55097      * RC=0 if something happens
55098      */
55099     *rc = (double)(0);
55100 
55101     /*
55102      * init
55103      */
55104     if( onenorm )
55105     {
55106         kase1 = 1;
55107     }
55108     else
55109     {
55110         kase1 = 2;
55111     }
55112     ae_vector_set_length(&iwork, n+1, _state);
55113     ae_vector_set_length(&tmp, n, _state);
55114 
55115     /*
55116      * prepare parameters for triangular solver
55117      */
55118     maxgrowth = 1/rcondthreshold(_state);
55119     s = (double)(0);
55120     for(i=0; i<=n-1; i++)
55121     {
55122         if( isupper )
55123         {
55124             j1 = i+1;
55125             j2 = n-1;
55126         }
55127         else
55128         {
55129             j1 = 0;
55130             j2 = i-1;
55131         }
55132         for(j=j1; j<=j2; j++)
55133         {
55134             s = ae_maxreal(s, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
55135         }
55136         if( isunit )
55137         {
55138             s = ae_maxreal(s, (double)(1), _state);
55139         }
55140         else
55141         {
55142             s = ae_maxreal(s, ae_fabs(a->ptr.pp_double[i][i], _state), _state);
55143         }
55144     }
55145     if( ae_fp_eq(s,(double)(0)) )
55146     {
55147         s = (double)(1);
55148     }
55149     s = 1/s;
55150 
55151     /*
55152      * Scale according to S
55153      */
55154     anorm = anorm*s;
55155 
55156     /*
55157      * Quick return if possible
55158      * We assume that ANORM<>0 after this block
55159      */
55160     if( ae_fp_eq(anorm,(double)(0)) )
55161     {
55162         ae_frame_leave(_state);
55163         return;
55164     }
55165     if( n==1 )
55166     {
55167         *rc = (double)(1);
55168         ae_frame_leave(_state);
55169         return;
55170     }
55171 
55172     /*
55173      * Estimate the norm of inv(A).
55174      */
55175     ainvnm = (double)(0);
55176     kase = 0;
55177     for(;;)
55178     {
55179         rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state);
55180         if( kase==0 )
55181         {
55182             break;
55183         }
55184 
55185         /*
55186          * from 1-based array to 0-based
55187          */
55188         for(i=0; i<=n-1; i++)
55189         {
55190             ex.ptr.p_double[i] = ex.ptr.p_double[i+1];
55191         }
55192 
55193         /*
55194          * multiply by inv(A) or inv(A')
55195          */
55196         if( kase==kase1 )
55197         {
55198 
55199             /*
55200              * multiply by inv(A)
55201              */
55202             if( !rmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 0, isunit, maxgrowth, _state) )
55203             {
55204                 ae_frame_leave(_state);
55205                 return;
55206             }
55207         }
55208         else
55209         {
55210 
55211             /*
55212              * multiply by inv(A')
55213              */
55214             if( !rmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 1, isunit, maxgrowth, _state) )
55215             {
55216                 ae_frame_leave(_state);
55217                 return;
55218             }
55219         }
55220 
55221         /*
55222          * from 0-based array to 1-based
55223          */
55224         for(i=n-1; i>=0; i--)
55225         {
55226             ex.ptr.p_double[i+1] = ex.ptr.p_double[i];
55227         }
55228     }
55229 
55230     /*
55231      * Compute the estimate of the reciprocal condition number.
55232      */
55233     if( ae_fp_neq(ainvnm,(double)(0)) )
55234     {
55235         *rc = 1/ainvnm;
55236         *rc = *rc/anorm;
55237         if( ae_fp_less(*rc,rcondthreshold(_state)) )
55238         {
55239             *rc = (double)(0);
55240         }
55241     }
55242     ae_frame_leave(_state);
55243 }
55244 
55245 
55246 /*************************************************************************
55247 Condition number estimation
55248 
55249   -- LAPACK routine (version 3.0) --
55250      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
55251      Courant Institute, Argonne National Lab, and Rice University
55252      March 31, 1993
55253 *************************************************************************/
rcond_cmatrixrcondtrinternal(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_bool onenorm,double anorm,double * rc,ae_state * _state)55254 static void rcond_cmatrixrcondtrinternal(/* Complex */ ae_matrix* a,
55255      ae_int_t n,
55256      ae_bool isupper,
55257      ae_bool isunit,
55258      ae_bool onenorm,
55259      double anorm,
55260      double* rc,
55261      ae_state *_state)
55262 {
55263     ae_frame _frame_block;
55264     ae_vector ex;
55265     ae_vector cwork2;
55266     ae_vector cwork3;
55267     ae_vector cwork4;
55268     ae_vector isave;
55269     ae_vector rsave;
55270     ae_int_t kase;
55271     ae_int_t kase1;
55272     double ainvnm;
55273     ae_int_t i;
55274     ae_int_t j;
55275     ae_int_t j1;
55276     ae_int_t j2;
55277     double s;
55278     double maxgrowth;
55279 
55280     ae_frame_make(_state, &_frame_block);
55281     memset(&ex, 0, sizeof(ex));
55282     memset(&cwork2, 0, sizeof(cwork2));
55283     memset(&cwork3, 0, sizeof(cwork3));
55284     memset(&cwork4, 0, sizeof(cwork4));
55285     memset(&isave, 0, sizeof(isave));
55286     memset(&rsave, 0, sizeof(rsave));
55287     *rc = 0;
55288     ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true);
55289     ae_vector_init(&cwork2, 0, DT_COMPLEX, _state, ae_true);
55290     ae_vector_init(&cwork3, 0, DT_COMPLEX, _state, ae_true);
55291     ae_vector_init(&cwork4, 0, DT_COMPLEX, _state, ae_true);
55292     ae_vector_init(&isave, 0, DT_INT, _state, ae_true);
55293     ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true);
55294 
55295 
55296     /*
55297      * RC=0 if something happens
55298      */
55299     *rc = (double)(0);
55300 
55301     /*
55302      * init
55303      */
55304     if( n<=0 )
55305     {
55306         ae_frame_leave(_state);
55307         return;
55308     }
55309     if( n==0 )
55310     {
55311         *rc = (double)(1);
55312         ae_frame_leave(_state);
55313         return;
55314     }
55315     ae_vector_set_length(&cwork2, n+1, _state);
55316 
55317     /*
55318      * prepare parameters for triangular solver
55319      */
55320     maxgrowth = 1/rcondthreshold(_state);
55321     s = (double)(0);
55322     for(i=0; i<=n-1; i++)
55323     {
55324         if( isupper )
55325         {
55326             j1 = i+1;
55327             j2 = n-1;
55328         }
55329         else
55330         {
55331             j1 = 0;
55332             j2 = i-1;
55333         }
55334         for(j=j1; j<=j2; j++)
55335         {
55336             s = ae_maxreal(s, ae_c_abs(a->ptr.pp_complex[i][j], _state), _state);
55337         }
55338         if( isunit )
55339         {
55340             s = ae_maxreal(s, (double)(1), _state);
55341         }
55342         else
55343         {
55344             s = ae_maxreal(s, ae_c_abs(a->ptr.pp_complex[i][i], _state), _state);
55345         }
55346     }
55347     if( ae_fp_eq(s,(double)(0)) )
55348     {
55349         s = (double)(1);
55350     }
55351     s = 1/s;
55352 
55353     /*
55354      * Scale according to S
55355      */
55356     anorm = anorm*s;
55357 
55358     /*
55359      * Quick return if possible
55360      */
55361     if( ae_fp_eq(anorm,(double)(0)) )
55362     {
55363         ae_frame_leave(_state);
55364         return;
55365     }
55366 
55367     /*
55368      * Estimate the norm of inv(A).
55369      */
55370     ainvnm = (double)(0);
55371     if( onenorm )
55372     {
55373         kase1 = 1;
55374     }
55375     else
55376     {
55377         kase1 = 2;
55378     }
55379     kase = 0;
55380     for(;;)
55381     {
55382         rcond_cmatrixestimatenorm(n, &cwork4, &ex, &ainvnm, &kase, &isave, &rsave, _state);
55383         if( kase==0 )
55384         {
55385             break;
55386         }
55387 
55388         /*
55389          * From 1-based to 0-based
55390          */
55391         for(i=0; i<=n-1; i++)
55392         {
55393             ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1];
55394         }
55395 
55396         /*
55397          * multiply by inv(A) or inv(A')
55398          */
55399         if( kase==kase1 )
55400         {
55401 
55402             /*
55403              * multiply by inv(A)
55404              */
55405             if( !cmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 0, isunit, maxgrowth, _state) )
55406             {
55407                 ae_frame_leave(_state);
55408                 return;
55409             }
55410         }
55411         else
55412         {
55413 
55414             /*
55415              * multiply by inv(A')
55416              */
55417             if( !cmatrixscaledtrsafesolve(a, s, n, &ex, isupper, 2, isunit, maxgrowth, _state) )
55418             {
55419                 ae_frame_leave(_state);
55420                 return;
55421             }
55422         }
55423 
55424         /*
55425          * from 0-based to 1-based
55426          */
55427         for(i=n-1; i>=0; i--)
55428         {
55429             ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i];
55430         }
55431     }
55432 
55433     /*
55434      * Compute the estimate of the reciprocal condition number.
55435      */
55436     if( ae_fp_neq(ainvnm,(double)(0)) )
55437     {
55438         *rc = 1/ainvnm;
55439         *rc = *rc/anorm;
55440         if( ae_fp_less(*rc,rcondthreshold(_state)) )
55441         {
55442             *rc = (double)(0);
55443         }
55444     }
55445     ae_frame_leave(_state);
55446 }
55447 
55448 
55449 /*************************************************************************
55450 Internal subroutine for condition number estimation
55451 
55452   -- LAPACK routine (version 3.0) --
55453      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
55454      Courant Institute, Argonne National Lab, and Rice University
55455      February 29, 1992
55456 *************************************************************************/
rcond_spdmatrixrcondcholeskyinternal(ae_matrix * cha,ae_int_t n,ae_bool isupper,ae_bool isnormprovided,double anorm,double * rc,ae_state * _state)55457 static void rcond_spdmatrixrcondcholeskyinternal(/* Real    */ ae_matrix* cha,
55458      ae_int_t n,
55459      ae_bool isupper,
55460      ae_bool isnormprovided,
55461      double anorm,
55462      double* rc,
55463      ae_state *_state)
55464 {
55465     ae_frame _frame_block;
55466     ae_int_t i;
55467     ae_int_t j;
55468     ae_int_t kase;
55469     double ainvnm;
55470     ae_vector ex;
55471     ae_vector ev;
55472     ae_vector tmp;
55473     ae_vector iwork;
55474     double sa;
55475     double v;
55476     double maxgrowth;
55477 
55478     ae_frame_make(_state, &_frame_block);
55479     memset(&ex, 0, sizeof(ex));
55480     memset(&ev, 0, sizeof(ev));
55481     memset(&tmp, 0, sizeof(tmp));
55482     memset(&iwork, 0, sizeof(iwork));
55483     *rc = 0;
55484     ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
55485     ae_vector_init(&ev, 0, DT_REAL, _state, ae_true);
55486     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
55487     ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
55488 
55489     ae_assert(n>=1, "Assertion failed", _state);
55490     ae_vector_set_length(&tmp, n, _state);
55491 
55492     /*
55493      * RC=0 if something happens
55494      */
55495     *rc = (double)(0);
55496 
55497     /*
55498      * prepare parameters for triangular solver
55499      */
55500     maxgrowth = 1/rcondthreshold(_state);
55501     sa = (double)(0);
55502     if( isupper )
55503     {
55504         for(i=0; i<=n-1; i++)
55505         {
55506             for(j=i; j<=n-1; j++)
55507             {
55508                 sa = ae_maxreal(sa, ae_c_abs(ae_complex_from_d(cha->ptr.pp_double[i][j]), _state), _state);
55509             }
55510         }
55511     }
55512     else
55513     {
55514         for(i=0; i<=n-1; i++)
55515         {
55516             for(j=0; j<=i; j++)
55517             {
55518                 sa = ae_maxreal(sa, ae_c_abs(ae_complex_from_d(cha->ptr.pp_double[i][j]), _state), _state);
55519             }
55520         }
55521     }
55522     if( ae_fp_eq(sa,(double)(0)) )
55523     {
55524         sa = (double)(1);
55525     }
55526     sa = 1/sa;
55527 
55528     /*
55529      * Estimate the norm of A.
55530      */
55531     if( !isnormprovided )
55532     {
55533         kase = 0;
55534         anorm = (double)(0);
55535         for(;;)
55536         {
55537             rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &anorm, &kase, _state);
55538             if( kase==0 )
55539             {
55540                 break;
55541             }
55542             if( isupper )
55543             {
55544 
55545                 /*
55546                  * Multiply by U
55547                  */
55548                 for(i=1; i<=n; i++)
55549                 {
55550                     v = ae_v_dotproduct(&cha->ptr.pp_double[i-1][i-1], 1, &ex.ptr.p_double[i], 1, ae_v_len(i-1,n-1));
55551                     ex.ptr.p_double[i] = v;
55552                 }
55553                 ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
55554 
55555                 /*
55556                  * Multiply by U'
55557                  */
55558                 for(i=0; i<=n-1; i++)
55559                 {
55560                     tmp.ptr.p_double[i] = (double)(0);
55561                 }
55562                 for(i=0; i<=n-1; i++)
55563                 {
55564                     v = ex.ptr.p_double[i+1];
55565                     ae_v_addd(&tmp.ptr.p_double[i], 1, &cha->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v);
55566                 }
55567                 ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
55568                 ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
55569             }
55570             else
55571             {
55572 
55573                 /*
55574                  * Multiply by L'
55575                  */
55576                 for(i=0; i<=n-1; i++)
55577                 {
55578                     tmp.ptr.p_double[i] = (double)(0);
55579                 }
55580                 for(i=0; i<=n-1; i++)
55581                 {
55582                     v = ex.ptr.p_double[i+1];
55583                     ae_v_addd(&tmp.ptr.p_double[0], 1, &cha->ptr.pp_double[i][0], 1, ae_v_len(0,i), v);
55584                 }
55585                 ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
55586                 ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
55587 
55588                 /*
55589                  * Multiply by L
55590                  */
55591                 for(i=n; i>=1; i--)
55592                 {
55593                     v = ae_v_dotproduct(&cha->ptr.pp_double[i-1][0], 1, &ex.ptr.p_double[1], 1, ae_v_len(0,i-1));
55594                     ex.ptr.p_double[i] = v;
55595                 }
55596                 ae_v_muld(&ex.ptr.p_double[1], 1, ae_v_len(1,n), sa);
55597             }
55598         }
55599     }
55600 
55601     /*
55602      * Quick return if possible
55603      */
55604     if( ae_fp_eq(anorm,(double)(0)) )
55605     {
55606         ae_frame_leave(_state);
55607         return;
55608     }
55609     if( n==1 )
55610     {
55611         *rc = (double)(1);
55612         ae_frame_leave(_state);
55613         return;
55614     }
55615 
55616     /*
55617      * Estimate the 1-norm of inv(A).
55618      */
55619     kase = 0;
55620     for(;;)
55621     {
55622         rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state);
55623         if( kase==0 )
55624         {
55625             break;
55626         }
55627         for(i=0; i<=n-1; i++)
55628         {
55629             ex.ptr.p_double[i] = ex.ptr.p_double[i+1];
55630         }
55631         if( isupper )
55632         {
55633 
55634             /*
55635              * Multiply by inv(U').
55636              */
55637             if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 1, ae_false, maxgrowth, _state) )
55638             {
55639                 ae_frame_leave(_state);
55640                 return;
55641             }
55642 
55643             /*
55644              * Multiply by inv(U).
55645              */
55646             if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
55647             {
55648                 ae_frame_leave(_state);
55649                 return;
55650             }
55651         }
55652         else
55653         {
55654 
55655             /*
55656              * Multiply by inv(L).
55657              */
55658             if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
55659             {
55660                 ae_frame_leave(_state);
55661                 return;
55662             }
55663 
55664             /*
55665              * Multiply by inv(L').
55666              */
55667             if( !rmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 1, ae_false, maxgrowth, _state) )
55668             {
55669                 ae_frame_leave(_state);
55670                 return;
55671             }
55672         }
55673         for(i=n-1; i>=0; i--)
55674         {
55675             ex.ptr.p_double[i+1] = ex.ptr.p_double[i];
55676         }
55677     }
55678 
55679     /*
55680      * Compute the estimate of the reciprocal condition number.
55681      */
55682     if( ae_fp_neq(ainvnm,(double)(0)) )
55683     {
55684         v = 1/ainvnm;
55685         *rc = v/anorm;
55686         if( ae_fp_less(*rc,rcondthreshold(_state)) )
55687         {
55688             *rc = (double)(0);
55689         }
55690     }
55691     ae_frame_leave(_state);
55692 }
55693 
55694 
55695 /*************************************************************************
55696 Internal subroutine for condition number estimation
55697 
55698   -- LAPACK routine (version 3.0) --
55699      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
55700      Courant Institute, Argonne National Lab, and Rice University
55701      February 29, 1992
55702 *************************************************************************/
rcond_hpdmatrixrcondcholeskyinternal(ae_matrix * cha,ae_int_t n,ae_bool isupper,ae_bool isnormprovided,double anorm,double * rc,ae_state * _state)55703 static void rcond_hpdmatrixrcondcholeskyinternal(/* Complex */ ae_matrix* cha,
55704      ae_int_t n,
55705      ae_bool isupper,
55706      ae_bool isnormprovided,
55707      double anorm,
55708      double* rc,
55709      ae_state *_state)
55710 {
55711     ae_frame _frame_block;
55712     ae_vector isave;
55713     ae_vector rsave;
55714     ae_vector ex;
55715     ae_vector ev;
55716     ae_vector tmp;
55717     ae_int_t kase;
55718     double ainvnm;
55719     ae_complex v;
55720     ae_int_t i;
55721     ae_int_t j;
55722     double sa;
55723     double maxgrowth;
55724 
55725     ae_frame_make(_state, &_frame_block);
55726     memset(&isave, 0, sizeof(isave));
55727     memset(&rsave, 0, sizeof(rsave));
55728     memset(&ex, 0, sizeof(ex));
55729     memset(&ev, 0, sizeof(ev));
55730     memset(&tmp, 0, sizeof(tmp));
55731     *rc = 0;
55732     ae_vector_init(&isave, 0, DT_INT, _state, ae_true);
55733     ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true);
55734     ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true);
55735     ae_vector_init(&ev, 0, DT_COMPLEX, _state, ae_true);
55736     ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
55737 
55738     ae_assert(n>=1, "Assertion failed", _state);
55739     ae_vector_set_length(&tmp, n, _state);
55740 
55741     /*
55742      * RC=0 if something happens
55743      */
55744     *rc = (double)(0);
55745 
55746     /*
55747      * prepare parameters for triangular solver
55748      */
55749     maxgrowth = 1/rcondthreshold(_state);
55750     sa = (double)(0);
55751     if( isupper )
55752     {
55753         for(i=0; i<=n-1; i++)
55754         {
55755             for(j=i; j<=n-1; j++)
55756             {
55757                 sa = ae_maxreal(sa, ae_c_abs(cha->ptr.pp_complex[i][j], _state), _state);
55758             }
55759         }
55760     }
55761     else
55762     {
55763         for(i=0; i<=n-1; i++)
55764         {
55765             for(j=0; j<=i; j++)
55766             {
55767                 sa = ae_maxreal(sa, ae_c_abs(cha->ptr.pp_complex[i][j], _state), _state);
55768             }
55769         }
55770     }
55771     if( ae_fp_eq(sa,(double)(0)) )
55772     {
55773         sa = (double)(1);
55774     }
55775     sa = 1/sa;
55776 
55777     /*
55778      * Estimate the norm of A
55779      */
55780     if( !isnormprovided )
55781     {
55782         anorm = (double)(0);
55783         kase = 0;
55784         for(;;)
55785         {
55786             rcond_cmatrixestimatenorm(n, &ev, &ex, &anorm, &kase, &isave, &rsave, _state);
55787             if( kase==0 )
55788             {
55789                 break;
55790             }
55791             if( isupper )
55792             {
55793 
55794                 /*
55795                  * Multiply by U
55796                  */
55797                 for(i=1; i<=n; i++)
55798                 {
55799                     v = ae_v_cdotproduct(&cha->ptr.pp_complex[i-1][i-1], 1, "N", &ex.ptr.p_complex[i], 1, "N", ae_v_len(i-1,n-1));
55800                     ex.ptr.p_complex[i] = v;
55801                 }
55802                 ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
55803 
55804                 /*
55805                  * Multiply by U'
55806                  */
55807                 for(i=0; i<=n-1; i++)
55808                 {
55809                     tmp.ptr.p_complex[i] = ae_complex_from_i(0);
55810                 }
55811                 for(i=0; i<=n-1; i++)
55812                 {
55813                     v = ex.ptr.p_complex[i+1];
55814                     ae_v_caddc(&tmp.ptr.p_complex[i], 1, &cha->ptr.pp_complex[i][i], 1, "Conj", ae_v_len(i,n-1), v);
55815                 }
55816                 ae_v_cmove(&ex.ptr.p_complex[1], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(1,n));
55817                 ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
55818             }
55819             else
55820             {
55821 
55822                 /*
55823                  * Multiply by L'
55824                  */
55825                 for(i=0; i<=n-1; i++)
55826                 {
55827                     tmp.ptr.p_complex[i] = ae_complex_from_i(0);
55828                 }
55829                 for(i=0; i<=n-1; i++)
55830                 {
55831                     v = ex.ptr.p_complex[i+1];
55832                     ae_v_caddc(&tmp.ptr.p_complex[0], 1, &cha->ptr.pp_complex[i][0], 1, "Conj", ae_v_len(0,i), v);
55833                 }
55834                 ae_v_cmove(&ex.ptr.p_complex[1], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(1,n));
55835                 ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
55836 
55837                 /*
55838                  * Multiply by L
55839                  */
55840                 for(i=n; i>=1; i--)
55841                 {
55842                     v = ae_v_cdotproduct(&cha->ptr.pp_complex[i-1][0], 1, "N", &ex.ptr.p_complex[1], 1, "N", ae_v_len(0,i-1));
55843                     ex.ptr.p_complex[i] = v;
55844                 }
55845                 ae_v_cmuld(&ex.ptr.p_complex[1], 1, ae_v_len(1,n), sa);
55846             }
55847         }
55848     }
55849 
55850     /*
55851      * Quick return if possible
55852      * After this block we assume that ANORM<>0
55853      */
55854     if( ae_fp_eq(anorm,(double)(0)) )
55855     {
55856         ae_frame_leave(_state);
55857         return;
55858     }
55859     if( n==1 )
55860     {
55861         *rc = (double)(1);
55862         ae_frame_leave(_state);
55863         return;
55864     }
55865 
55866     /*
55867      * Estimate the norm of inv(A).
55868      */
55869     ainvnm = (double)(0);
55870     kase = 0;
55871     for(;;)
55872     {
55873         rcond_cmatrixestimatenorm(n, &ev, &ex, &ainvnm, &kase, &isave, &rsave, _state);
55874         if( kase==0 )
55875         {
55876             break;
55877         }
55878         for(i=0; i<=n-1; i++)
55879         {
55880             ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1];
55881         }
55882         if( isupper )
55883         {
55884 
55885             /*
55886              * Multiply by inv(U').
55887              */
55888             if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 2, ae_false, maxgrowth, _state) )
55889             {
55890                 ae_frame_leave(_state);
55891                 return;
55892             }
55893 
55894             /*
55895              * Multiply by inv(U).
55896              */
55897             if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
55898             {
55899                 ae_frame_leave(_state);
55900                 return;
55901             }
55902         }
55903         else
55904         {
55905 
55906             /*
55907              * Multiply by inv(L).
55908              */
55909             if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 0, ae_false, maxgrowth, _state) )
55910             {
55911                 ae_frame_leave(_state);
55912                 return;
55913             }
55914 
55915             /*
55916              * Multiply by inv(L').
55917              */
55918             if( !cmatrixscaledtrsafesolve(cha, sa, n, &ex, isupper, 2, ae_false, maxgrowth, _state) )
55919             {
55920                 ae_frame_leave(_state);
55921                 return;
55922             }
55923         }
55924         for(i=n-1; i>=0; i--)
55925         {
55926             ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i];
55927         }
55928     }
55929 
55930     /*
55931      * Compute the estimate of the reciprocal condition number.
55932      */
55933     if( ae_fp_neq(ainvnm,(double)(0)) )
55934     {
55935         *rc = 1/ainvnm;
55936         *rc = *rc/anorm;
55937         if( ae_fp_less(*rc,rcondthreshold(_state)) )
55938         {
55939             *rc = (double)(0);
55940         }
55941     }
55942     ae_frame_leave(_state);
55943 }
55944 
55945 
55946 /*************************************************************************
55947 Internal subroutine for condition number estimation
55948 
55949   -- LAPACK routine (version 3.0) --
55950      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
55951      Courant Institute, Argonne National Lab, and Rice University
55952      February 29, 1992
55953 *************************************************************************/
rcond_rmatrixrcondluinternal(ae_matrix * lua,ae_int_t n,ae_bool onenorm,ae_bool isanormprovided,double anorm,double * rc,ae_state * _state)55954 static void rcond_rmatrixrcondluinternal(/* Real    */ ae_matrix* lua,
55955      ae_int_t n,
55956      ae_bool onenorm,
55957      ae_bool isanormprovided,
55958      double anorm,
55959      double* rc,
55960      ae_state *_state)
55961 {
55962     ae_frame _frame_block;
55963     ae_vector ex;
55964     ae_vector ev;
55965     ae_vector iwork;
55966     ae_vector tmp;
55967     double v;
55968     ae_int_t i;
55969     ae_int_t j;
55970     ae_int_t kase;
55971     ae_int_t kase1;
55972     double ainvnm;
55973     double maxgrowth;
55974     double su;
55975     double sl;
55976     ae_bool mupper;
55977     ae_bool munit;
55978 
55979     ae_frame_make(_state, &_frame_block);
55980     memset(&ex, 0, sizeof(ex));
55981     memset(&ev, 0, sizeof(ev));
55982     memset(&iwork, 0, sizeof(iwork));
55983     memset(&tmp, 0, sizeof(tmp));
55984     *rc = 0;
55985     ae_vector_init(&ex, 0, DT_REAL, _state, ae_true);
55986     ae_vector_init(&ev, 0, DT_REAL, _state, ae_true);
55987     ae_vector_init(&iwork, 0, DT_INT, _state, ae_true);
55988     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
55989 
55990 
55991     /*
55992      * RC=0 if something happens
55993      */
55994     *rc = (double)(0);
55995 
55996     /*
55997      * init
55998      */
55999     if( onenorm )
56000     {
56001         kase1 = 1;
56002     }
56003     else
56004     {
56005         kase1 = 2;
56006     }
56007     mupper = ae_true;
56008     munit = ae_true;
56009     ae_vector_set_length(&iwork, n+1, _state);
56010     ae_vector_set_length(&tmp, n, _state);
56011 
56012     /*
56013      * prepare parameters for triangular solver
56014      */
56015     maxgrowth = 1/rcondthreshold(_state);
56016     su = (double)(0);
56017     sl = (double)(1);
56018     for(i=0; i<=n-1; i++)
56019     {
56020         for(j=0; j<=i-1; j++)
56021         {
56022             sl = ae_maxreal(sl, ae_fabs(lua->ptr.pp_double[i][j], _state), _state);
56023         }
56024         for(j=i; j<=n-1; j++)
56025         {
56026             su = ae_maxreal(su, ae_fabs(lua->ptr.pp_double[i][j], _state), _state);
56027         }
56028     }
56029     if( ae_fp_eq(su,(double)(0)) )
56030     {
56031         su = (double)(1);
56032     }
56033     su = 1/su;
56034     sl = 1/sl;
56035 
56036     /*
56037      * Estimate the norm of A.
56038      */
56039     if( !isanormprovided )
56040     {
56041         kase = 0;
56042         anorm = (double)(0);
56043         for(;;)
56044         {
56045             rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &anorm, &kase, _state);
56046             if( kase==0 )
56047             {
56048                 break;
56049             }
56050             if( kase==kase1 )
56051             {
56052 
56053                 /*
56054                  * Multiply by U
56055                  */
56056                 for(i=1; i<=n; i++)
56057                 {
56058                     v = ae_v_dotproduct(&lua->ptr.pp_double[i-1][i-1], 1, &ex.ptr.p_double[i], 1, ae_v_len(i-1,n-1));
56059                     ex.ptr.p_double[i] = v;
56060                 }
56061 
56062                 /*
56063                  * Multiply by L
56064                  */
56065                 for(i=n; i>=1; i--)
56066                 {
56067                     if( i>1 )
56068                     {
56069                         v = ae_v_dotproduct(&lua->ptr.pp_double[i-1][0], 1, &ex.ptr.p_double[1], 1, ae_v_len(0,i-2));
56070                     }
56071                     else
56072                     {
56073                         v = (double)(0);
56074                     }
56075                     ex.ptr.p_double[i] = ex.ptr.p_double[i]+v;
56076                 }
56077             }
56078             else
56079             {
56080 
56081                 /*
56082                  * Multiply by L'
56083                  */
56084                 for(i=0; i<=n-1; i++)
56085                 {
56086                     tmp.ptr.p_double[i] = (double)(0);
56087                 }
56088                 for(i=0; i<=n-1; i++)
56089                 {
56090                     v = ex.ptr.p_double[i+1];
56091                     if( i>=1 )
56092                     {
56093                         ae_v_addd(&tmp.ptr.p_double[0], 1, &lua->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), v);
56094                     }
56095                     tmp.ptr.p_double[i] = tmp.ptr.p_double[i]+v;
56096                 }
56097                 ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
56098 
56099                 /*
56100                  * Multiply by U'
56101                  */
56102                 for(i=0; i<=n-1; i++)
56103                 {
56104                     tmp.ptr.p_double[i] = (double)(0);
56105                 }
56106                 for(i=0; i<=n-1; i++)
56107                 {
56108                     v = ex.ptr.p_double[i+1];
56109                     ae_v_addd(&tmp.ptr.p_double[i], 1, &lua->ptr.pp_double[i][i], 1, ae_v_len(i,n-1), v);
56110                 }
56111                 ae_v_move(&ex.ptr.p_double[1], 1, &tmp.ptr.p_double[0], 1, ae_v_len(1,n));
56112             }
56113         }
56114     }
56115 
56116     /*
56117      * Scale according to SU/SL
56118      */
56119     anorm = anorm*su*sl;
56120 
56121     /*
56122      * Quick return if possible
56123      * We assume that ANORM<>0 after this block
56124      */
56125     if( ae_fp_eq(anorm,(double)(0)) )
56126     {
56127         ae_frame_leave(_state);
56128         return;
56129     }
56130     if( n==1 )
56131     {
56132         *rc = (double)(1);
56133         ae_frame_leave(_state);
56134         return;
56135     }
56136 
56137     /*
56138      * Estimate the norm of inv(A).
56139      */
56140     ainvnm = (double)(0);
56141     kase = 0;
56142     for(;;)
56143     {
56144         rcond_rmatrixestimatenorm(n, &ev, &ex, &iwork, &ainvnm, &kase, _state);
56145         if( kase==0 )
56146         {
56147             break;
56148         }
56149 
56150         /*
56151          * from 1-based array to 0-based
56152          */
56153         for(i=0; i<=n-1; i++)
56154         {
56155             ex.ptr.p_double[i] = ex.ptr.p_double[i+1];
56156         }
56157 
56158         /*
56159          * multiply by inv(A) or inv(A')
56160          */
56161         if( kase==kase1 )
56162         {
56163 
56164             /*
56165              * Multiply by inv(L).
56166              */
56167             if( !rmatrixscaledtrsafesolve(lua, sl, n, &ex, !mupper, 0, munit, maxgrowth, _state) )
56168             {
56169                 ae_frame_leave(_state);
56170                 return;
56171             }
56172 
56173             /*
56174              * Multiply by inv(U).
56175              */
56176             if( !rmatrixscaledtrsafesolve(lua, su, n, &ex, mupper, 0, !munit, maxgrowth, _state) )
56177             {
56178                 ae_frame_leave(_state);
56179                 return;
56180             }
56181         }
56182         else
56183         {
56184 
56185             /*
56186              * Multiply by inv(U').
56187              */
56188             if( !rmatrixscaledtrsafesolve(lua, su, n, &ex, mupper, 1, !munit, maxgrowth, _state) )
56189             {
56190                 ae_frame_leave(_state);
56191                 return;
56192             }
56193 
56194             /*
56195              * Multiply by inv(L').
56196              */
56197             if( !rmatrixscaledtrsafesolve(lua, sl, n, &ex, !mupper, 1, munit, maxgrowth, _state) )
56198             {
56199                 ae_frame_leave(_state);
56200                 return;
56201             }
56202         }
56203 
56204         /*
56205          * from 0-based array to 1-based
56206          */
56207         for(i=n-1; i>=0; i--)
56208         {
56209             ex.ptr.p_double[i+1] = ex.ptr.p_double[i];
56210         }
56211     }
56212 
56213     /*
56214      * Compute the estimate of the reciprocal condition number.
56215      */
56216     if( ae_fp_neq(ainvnm,(double)(0)) )
56217     {
56218         *rc = 1/ainvnm;
56219         *rc = *rc/anorm;
56220         if( ae_fp_less(*rc,rcondthreshold(_state)) )
56221         {
56222             *rc = (double)(0);
56223         }
56224     }
56225     ae_frame_leave(_state);
56226 }
56227 
56228 
56229 /*************************************************************************
56230 Condition number estimation
56231 
56232   -- LAPACK routine (version 3.0) --
56233      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
56234      Courant Institute, Argonne National Lab, and Rice University
56235      March 31, 1993
56236 *************************************************************************/
rcond_cmatrixrcondluinternal(ae_matrix * lua,ae_int_t n,ae_bool onenorm,ae_bool isanormprovided,double anorm,double * rc,ae_state * _state)56237 static void rcond_cmatrixrcondluinternal(/* Complex */ ae_matrix* lua,
56238      ae_int_t n,
56239      ae_bool onenorm,
56240      ae_bool isanormprovided,
56241      double anorm,
56242      double* rc,
56243      ae_state *_state)
56244 {
56245     ae_frame _frame_block;
56246     ae_vector ex;
56247     ae_vector cwork2;
56248     ae_vector cwork3;
56249     ae_vector cwork4;
56250     ae_vector isave;
56251     ae_vector rsave;
56252     ae_int_t kase;
56253     ae_int_t kase1;
56254     double ainvnm;
56255     ae_complex v;
56256     ae_int_t i;
56257     ae_int_t j;
56258     double su;
56259     double sl;
56260     double maxgrowth;
56261 
56262     ae_frame_make(_state, &_frame_block);
56263     memset(&ex, 0, sizeof(ex));
56264     memset(&cwork2, 0, sizeof(cwork2));
56265     memset(&cwork3, 0, sizeof(cwork3));
56266     memset(&cwork4, 0, sizeof(cwork4));
56267     memset(&isave, 0, sizeof(isave));
56268     memset(&rsave, 0, sizeof(rsave));
56269     *rc = 0;
56270     ae_vector_init(&ex, 0, DT_COMPLEX, _state, ae_true);
56271     ae_vector_init(&cwork2, 0, DT_COMPLEX, _state, ae_true);
56272     ae_vector_init(&cwork3, 0, DT_COMPLEX, _state, ae_true);
56273     ae_vector_init(&cwork4, 0, DT_COMPLEX, _state, ae_true);
56274     ae_vector_init(&isave, 0, DT_INT, _state, ae_true);
56275     ae_vector_init(&rsave, 0, DT_REAL, _state, ae_true);
56276 
56277     if( n<=0 )
56278     {
56279         ae_frame_leave(_state);
56280         return;
56281     }
56282     ae_vector_set_length(&cwork2, n+1, _state);
56283     *rc = (double)(0);
56284     if( n==0 )
56285     {
56286         *rc = (double)(1);
56287         ae_frame_leave(_state);
56288         return;
56289     }
56290 
56291     /*
56292      * prepare parameters for triangular solver
56293      */
56294     maxgrowth = 1/rcondthreshold(_state);
56295     su = (double)(0);
56296     sl = (double)(1);
56297     for(i=0; i<=n-1; i++)
56298     {
56299         for(j=0; j<=i-1; j++)
56300         {
56301             sl = ae_maxreal(sl, ae_c_abs(lua->ptr.pp_complex[i][j], _state), _state);
56302         }
56303         for(j=i; j<=n-1; j++)
56304         {
56305             su = ae_maxreal(su, ae_c_abs(lua->ptr.pp_complex[i][j], _state), _state);
56306         }
56307     }
56308     if( ae_fp_eq(su,(double)(0)) )
56309     {
56310         su = (double)(1);
56311     }
56312     su = 1/su;
56313     sl = 1/sl;
56314 
56315     /*
56316      * Estimate the norm of SU*SL*A.
56317      */
56318     if( !isanormprovided )
56319     {
56320         anorm = (double)(0);
56321         if( onenorm )
56322         {
56323             kase1 = 1;
56324         }
56325         else
56326         {
56327             kase1 = 2;
56328         }
56329         kase = 0;
56330         do
56331         {
56332             rcond_cmatrixestimatenorm(n, &cwork4, &ex, &anorm, &kase, &isave, &rsave, _state);
56333             if( kase!=0 )
56334             {
56335                 if( kase==kase1 )
56336                 {
56337 
56338                     /*
56339                      * Multiply by U
56340                      */
56341                     for(i=1; i<=n; i++)
56342                     {
56343                         v = ae_v_cdotproduct(&lua->ptr.pp_complex[i-1][i-1], 1, "N", &ex.ptr.p_complex[i], 1, "N", ae_v_len(i-1,n-1));
56344                         ex.ptr.p_complex[i] = v;
56345                     }
56346 
56347                     /*
56348                      * Multiply by L
56349                      */
56350                     for(i=n; i>=1; i--)
56351                     {
56352                         v = ae_complex_from_i(0);
56353                         if( i>1 )
56354                         {
56355                             v = ae_v_cdotproduct(&lua->ptr.pp_complex[i-1][0], 1, "N", &ex.ptr.p_complex[1], 1, "N", ae_v_len(0,i-2));
56356                         }
56357                         ex.ptr.p_complex[i] = ae_c_add(v,ex.ptr.p_complex[i]);
56358                     }
56359                 }
56360                 else
56361                 {
56362 
56363                     /*
56364                      * Multiply by L'
56365                      */
56366                     for(i=1; i<=n; i++)
56367                     {
56368                         cwork2.ptr.p_complex[i] = ae_complex_from_i(0);
56369                     }
56370                     for(i=1; i<=n; i++)
56371                     {
56372                         v = ex.ptr.p_complex[i];
56373                         if( i>1 )
56374                         {
56375                             ae_v_caddc(&cwork2.ptr.p_complex[1], 1, &lua->ptr.pp_complex[i-1][0], 1, "Conj", ae_v_len(1,i-1), v);
56376                         }
56377                         cwork2.ptr.p_complex[i] = ae_c_add(cwork2.ptr.p_complex[i],v);
56378                     }
56379 
56380                     /*
56381                      * Multiply by U'
56382                      */
56383                     for(i=1; i<=n; i++)
56384                     {
56385                         ex.ptr.p_complex[i] = ae_complex_from_i(0);
56386                     }
56387                     for(i=1; i<=n; i++)
56388                     {
56389                         v = cwork2.ptr.p_complex[i];
56390                         ae_v_caddc(&ex.ptr.p_complex[i], 1, &lua->ptr.pp_complex[i-1][i-1], 1, "Conj", ae_v_len(i,n), v);
56391                     }
56392                 }
56393             }
56394         }
56395         while(kase!=0);
56396     }
56397 
56398     /*
56399      * Scale according to SU/SL
56400      */
56401     anorm = anorm*su*sl;
56402 
56403     /*
56404      * Quick return if possible
56405      */
56406     if( ae_fp_eq(anorm,(double)(0)) )
56407     {
56408         ae_frame_leave(_state);
56409         return;
56410     }
56411 
56412     /*
56413      * Estimate the norm of inv(A).
56414      */
56415     ainvnm = (double)(0);
56416     if( onenorm )
56417     {
56418         kase1 = 1;
56419     }
56420     else
56421     {
56422         kase1 = 2;
56423     }
56424     kase = 0;
56425     for(;;)
56426     {
56427         rcond_cmatrixestimatenorm(n, &cwork4, &ex, &ainvnm, &kase, &isave, &rsave, _state);
56428         if( kase==0 )
56429         {
56430             break;
56431         }
56432 
56433         /*
56434          * From 1-based to 0-based
56435          */
56436         for(i=0; i<=n-1; i++)
56437         {
56438             ex.ptr.p_complex[i] = ex.ptr.p_complex[i+1];
56439         }
56440 
56441         /*
56442          * multiply by inv(A) or inv(A')
56443          */
56444         if( kase==kase1 )
56445         {
56446 
56447             /*
56448              * Multiply by inv(L).
56449              */
56450             if( !cmatrixscaledtrsafesolve(lua, sl, n, &ex, ae_false, 0, ae_true, maxgrowth, _state) )
56451             {
56452                 *rc = (double)(0);
56453                 ae_frame_leave(_state);
56454                 return;
56455             }
56456 
56457             /*
56458              * Multiply by inv(U).
56459              */
56460             if( !cmatrixscaledtrsafesolve(lua, su, n, &ex, ae_true, 0, ae_false, maxgrowth, _state) )
56461             {
56462                 *rc = (double)(0);
56463                 ae_frame_leave(_state);
56464                 return;
56465             }
56466         }
56467         else
56468         {
56469 
56470             /*
56471              * Multiply by inv(U').
56472              */
56473             if( !cmatrixscaledtrsafesolve(lua, su, n, &ex, ae_true, 2, ae_false, maxgrowth, _state) )
56474             {
56475                 *rc = (double)(0);
56476                 ae_frame_leave(_state);
56477                 return;
56478             }
56479 
56480             /*
56481              * Multiply by inv(L').
56482              */
56483             if( !cmatrixscaledtrsafesolve(lua, sl, n, &ex, ae_false, 2, ae_true, maxgrowth, _state) )
56484             {
56485                 *rc = (double)(0);
56486                 ae_frame_leave(_state);
56487                 return;
56488             }
56489         }
56490 
56491         /*
56492          * from 0-based to 1-based
56493          */
56494         for(i=n-1; i>=0; i--)
56495         {
56496             ex.ptr.p_complex[i+1] = ex.ptr.p_complex[i];
56497         }
56498     }
56499 
56500     /*
56501      * Compute the estimate of the reciprocal condition number.
56502      */
56503     if( ae_fp_neq(ainvnm,(double)(0)) )
56504     {
56505         *rc = 1/ainvnm;
56506         *rc = *rc/anorm;
56507         if( ae_fp_less(*rc,rcondthreshold(_state)) )
56508         {
56509             *rc = (double)(0);
56510         }
56511     }
56512     ae_frame_leave(_state);
56513 }
56514 
56515 
56516 /*************************************************************************
56517 Internal subroutine for matrix norm estimation
56518 
56519   -- LAPACK auxiliary routine (version 3.0) --
56520      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
56521      Courant Institute, Argonne National Lab, and Rice University
56522      February 29, 1992
56523 *************************************************************************/
rcond_rmatrixestimatenorm(ae_int_t n,ae_vector * v,ae_vector * x,ae_vector * isgn,double * est,ae_int_t * kase,ae_state * _state)56524 static void rcond_rmatrixestimatenorm(ae_int_t n,
56525      /* Real    */ ae_vector* v,
56526      /* Real    */ ae_vector* x,
56527      /* Integer */ ae_vector* isgn,
56528      double* est,
56529      ae_int_t* kase,
56530      ae_state *_state)
56531 {
56532     ae_int_t itmax;
56533     ae_int_t i;
56534     double t;
56535     ae_bool flg;
56536     ae_int_t positer;
56537     ae_int_t posj;
56538     ae_int_t posjlast;
56539     ae_int_t posjump;
56540     ae_int_t posaltsgn;
56541     ae_int_t posestold;
56542     ae_int_t postemp;
56543 
56544 
56545     itmax = 5;
56546     posaltsgn = n+1;
56547     posestold = n+2;
56548     postemp = n+3;
56549     positer = n+1;
56550     posj = n+2;
56551     posjlast = n+3;
56552     posjump = n+4;
56553     if( *kase==0 )
56554     {
56555         ae_vector_set_length(v, n+4, _state);
56556         ae_vector_set_length(x, n+1, _state);
56557         ae_vector_set_length(isgn, n+5, _state);
56558         t = (double)1/(double)n;
56559         for(i=1; i<=n; i++)
56560         {
56561             x->ptr.p_double[i] = t;
56562         }
56563         *kase = 1;
56564         isgn->ptr.p_int[posjump] = 1;
56565         return;
56566     }
56567 
56568     /*
56569      *     ................ ENTRY   (JUMP = 1)
56570      *     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
56571      */
56572     if( isgn->ptr.p_int[posjump]==1 )
56573     {
56574         if( n==1 )
56575         {
56576             v->ptr.p_double[1] = x->ptr.p_double[1];
56577             *est = ae_fabs(v->ptr.p_double[1], _state);
56578             *kase = 0;
56579             return;
56580         }
56581         *est = (double)(0);
56582         for(i=1; i<=n; i++)
56583         {
56584             *est = *est+ae_fabs(x->ptr.p_double[i], _state);
56585         }
56586         for(i=1; i<=n; i++)
56587         {
56588             if( ae_fp_greater_eq(x->ptr.p_double[i],(double)(0)) )
56589             {
56590                 x->ptr.p_double[i] = (double)(1);
56591             }
56592             else
56593             {
56594                 x->ptr.p_double[i] = (double)(-1);
56595             }
56596             isgn->ptr.p_int[i] = ae_sign(x->ptr.p_double[i], _state);
56597         }
56598         *kase = 2;
56599         isgn->ptr.p_int[posjump] = 2;
56600         return;
56601     }
56602 
56603     /*
56604      *     ................ ENTRY   (JUMP = 2)
56605      *     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
56606      */
56607     if( isgn->ptr.p_int[posjump]==2 )
56608     {
56609         isgn->ptr.p_int[posj] = 1;
56610         for(i=2; i<=n; i++)
56611         {
56612             if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state)) )
56613             {
56614                 isgn->ptr.p_int[posj] = i;
56615             }
56616         }
56617         isgn->ptr.p_int[positer] = 2;
56618 
56619         /*
56620          * MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
56621          */
56622         for(i=1; i<=n; i++)
56623         {
56624             x->ptr.p_double[i] = (double)(0);
56625         }
56626         x->ptr.p_double[isgn->ptr.p_int[posj]] = (double)(1);
56627         *kase = 1;
56628         isgn->ptr.p_int[posjump] = 3;
56629         return;
56630     }
56631 
56632     /*
56633      *     ................ ENTRY   (JUMP = 3)
56634      *     X HAS BEEN OVERWRITTEN BY A*X.
56635      */
56636     if( isgn->ptr.p_int[posjump]==3 )
56637     {
56638         ae_v_move(&v->ptr.p_double[1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,n));
56639         v->ptr.p_double[posestold] = *est;
56640         *est = (double)(0);
56641         for(i=1; i<=n; i++)
56642         {
56643             *est = *est+ae_fabs(v->ptr.p_double[i], _state);
56644         }
56645         flg = ae_false;
56646         for(i=1; i<=n; i++)
56647         {
56648             if( (ae_fp_greater_eq(x->ptr.p_double[i],(double)(0))&&isgn->ptr.p_int[i]<0)||(ae_fp_less(x->ptr.p_double[i],(double)(0))&&isgn->ptr.p_int[i]>=0) )
56649             {
56650                 flg = ae_true;
56651             }
56652         }
56653 
56654         /*
56655          * REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.
56656          * OR MAY BE CYCLING.
56657          */
56658         if( !flg||ae_fp_less_eq(*est,v->ptr.p_double[posestold]) )
56659         {
56660             v->ptr.p_double[posaltsgn] = (double)(1);
56661             for(i=1; i<=n; i++)
56662             {
56663                 x->ptr.p_double[i] = v->ptr.p_double[posaltsgn]*(1+(double)(i-1)/(double)(n-1));
56664                 v->ptr.p_double[posaltsgn] = -v->ptr.p_double[posaltsgn];
56665             }
56666             *kase = 1;
56667             isgn->ptr.p_int[posjump] = 5;
56668             return;
56669         }
56670         for(i=1; i<=n; i++)
56671         {
56672             if( ae_fp_greater_eq(x->ptr.p_double[i],(double)(0)) )
56673             {
56674                 x->ptr.p_double[i] = (double)(1);
56675                 isgn->ptr.p_int[i] = 1;
56676             }
56677             else
56678             {
56679                 x->ptr.p_double[i] = (double)(-1);
56680                 isgn->ptr.p_int[i] = -1;
56681             }
56682         }
56683         *kase = 2;
56684         isgn->ptr.p_int[posjump] = 4;
56685         return;
56686     }
56687 
56688     /*
56689      *     ................ ENTRY   (JUMP = 4)
56690      *     X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
56691      */
56692     if( isgn->ptr.p_int[posjump]==4 )
56693     {
56694         isgn->ptr.p_int[posjlast] = isgn->ptr.p_int[posj];
56695         isgn->ptr.p_int[posj] = 1;
56696         for(i=2; i<=n; i++)
56697         {
56698             if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state)) )
56699             {
56700                 isgn->ptr.p_int[posj] = i;
56701             }
56702         }
56703         if( ae_fp_neq(x->ptr.p_double[isgn->ptr.p_int[posjlast]],ae_fabs(x->ptr.p_double[isgn->ptr.p_int[posj]], _state))&&isgn->ptr.p_int[positer]<itmax )
56704         {
56705             isgn->ptr.p_int[positer] = isgn->ptr.p_int[positer]+1;
56706             for(i=1; i<=n; i++)
56707             {
56708                 x->ptr.p_double[i] = (double)(0);
56709             }
56710             x->ptr.p_double[isgn->ptr.p_int[posj]] = (double)(1);
56711             *kase = 1;
56712             isgn->ptr.p_int[posjump] = 3;
56713             return;
56714         }
56715 
56716         /*
56717          * ITERATION COMPLETE.  FINAL STAGE.
56718          */
56719         v->ptr.p_double[posaltsgn] = (double)(1);
56720         for(i=1; i<=n; i++)
56721         {
56722             x->ptr.p_double[i] = v->ptr.p_double[posaltsgn]*(1+(double)(i-1)/(double)(n-1));
56723             v->ptr.p_double[posaltsgn] = -v->ptr.p_double[posaltsgn];
56724         }
56725         *kase = 1;
56726         isgn->ptr.p_int[posjump] = 5;
56727         return;
56728     }
56729 
56730     /*
56731      *     ................ ENTRY   (JUMP = 5)
56732      *     X HAS BEEN OVERWRITTEN BY A*X.
56733      */
56734     if( isgn->ptr.p_int[posjump]==5 )
56735     {
56736         v->ptr.p_double[postemp] = (double)(0);
56737         for(i=1; i<=n; i++)
56738         {
56739             v->ptr.p_double[postemp] = v->ptr.p_double[postemp]+ae_fabs(x->ptr.p_double[i], _state);
56740         }
56741         v->ptr.p_double[postemp] = 2*v->ptr.p_double[postemp]/(3*n);
56742         if( ae_fp_greater(v->ptr.p_double[postemp],*est) )
56743         {
56744             ae_v_move(&v->ptr.p_double[1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,n));
56745             *est = v->ptr.p_double[postemp];
56746         }
56747         *kase = 0;
56748         return;
56749     }
56750 }
56751 
56752 
rcond_cmatrixestimatenorm(ae_int_t n,ae_vector * v,ae_vector * x,double * est,ae_int_t * kase,ae_vector * isave,ae_vector * rsave,ae_state * _state)56753 static void rcond_cmatrixestimatenorm(ae_int_t n,
56754      /* Complex */ ae_vector* v,
56755      /* Complex */ ae_vector* x,
56756      double* est,
56757      ae_int_t* kase,
56758      /* Integer */ ae_vector* isave,
56759      /* Real    */ ae_vector* rsave,
56760      ae_state *_state)
56761 {
56762     ae_int_t itmax;
56763     ae_int_t i;
56764     ae_int_t iter;
56765     ae_int_t j;
56766     ae_int_t jlast;
56767     ae_int_t jump;
56768     double absxi;
56769     double altsgn;
56770     double estold;
56771     double safmin;
56772     double temp;
56773 
56774 
56775 
56776     /*
56777      *Executable Statements ..
56778      */
56779     itmax = 5;
56780     safmin = ae_minrealnumber;
56781     if( *kase==0 )
56782     {
56783         ae_vector_set_length(v, n+1, _state);
56784         ae_vector_set_length(x, n+1, _state);
56785         ae_vector_set_length(isave, 5, _state);
56786         ae_vector_set_length(rsave, 4, _state);
56787         for(i=1; i<=n; i++)
56788         {
56789             x->ptr.p_complex[i] = ae_complex_from_d((double)1/(double)n);
56790         }
56791         *kase = 1;
56792         jump = 1;
56793         rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56794         return;
56795     }
56796     rcond_internalcomplexrcondloadall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56797 
56798     /*
56799      * ENTRY   (JUMP = 1)
56800      * FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
56801      */
56802     if( jump==1 )
56803     {
56804         if( n==1 )
56805         {
56806             v->ptr.p_complex[1] = x->ptr.p_complex[1];
56807             *est = ae_c_abs(v->ptr.p_complex[1], _state);
56808             *kase = 0;
56809             rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56810             return;
56811         }
56812         *est = rcond_internalcomplexrcondscsum1(x, n, _state);
56813         for(i=1; i<=n; i++)
56814         {
56815             absxi = ae_c_abs(x->ptr.p_complex[i], _state);
56816             if( ae_fp_greater(absxi,safmin) )
56817             {
56818                 x->ptr.p_complex[i] = ae_c_div_d(x->ptr.p_complex[i],absxi);
56819             }
56820             else
56821             {
56822                 x->ptr.p_complex[i] = ae_complex_from_i(1);
56823             }
56824         }
56825         *kase = 2;
56826         jump = 2;
56827         rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56828         return;
56829     }
56830 
56831     /*
56832      * ENTRY   (JUMP = 2)
56833      * FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY CTRANS(A)*X.
56834      */
56835     if( jump==2 )
56836     {
56837         j = rcond_internalcomplexrcondicmax1(x, n, _state);
56838         iter = 2;
56839 
56840         /*
56841          * MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
56842          */
56843         for(i=1; i<=n; i++)
56844         {
56845             x->ptr.p_complex[i] = ae_complex_from_i(0);
56846         }
56847         x->ptr.p_complex[j] = ae_complex_from_i(1);
56848         *kase = 1;
56849         jump = 3;
56850         rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56851         return;
56852     }
56853 
56854     /*
56855      * ENTRY   (JUMP = 3)
56856      * X HAS BEEN OVERWRITTEN BY A*X.
56857      */
56858     if( jump==3 )
56859     {
56860         ae_v_cmove(&v->ptr.p_complex[1], 1, &x->ptr.p_complex[1], 1, "N", ae_v_len(1,n));
56861         estold = *est;
56862         *est = rcond_internalcomplexrcondscsum1(v, n, _state);
56863 
56864         /*
56865          * TEST FOR CYCLING.
56866          */
56867         if( ae_fp_less_eq(*est,estold) )
56868         {
56869 
56870             /*
56871              * ITERATION COMPLETE.  FINAL STAGE.
56872              */
56873             altsgn = (double)(1);
56874             for(i=1; i<=n; i++)
56875             {
56876                 x->ptr.p_complex[i] = ae_complex_from_d(altsgn*(1+(double)(i-1)/(double)(n-1)));
56877                 altsgn = -altsgn;
56878             }
56879             *kase = 1;
56880             jump = 5;
56881             rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56882             return;
56883         }
56884         for(i=1; i<=n; i++)
56885         {
56886             absxi = ae_c_abs(x->ptr.p_complex[i], _state);
56887             if( ae_fp_greater(absxi,safmin) )
56888             {
56889                 x->ptr.p_complex[i] = ae_c_div_d(x->ptr.p_complex[i],absxi);
56890             }
56891             else
56892             {
56893                 x->ptr.p_complex[i] = ae_complex_from_i(1);
56894             }
56895         }
56896         *kase = 2;
56897         jump = 4;
56898         rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56899         return;
56900     }
56901 
56902     /*
56903      * ENTRY   (JUMP = 4)
56904      * X HAS BEEN OVERWRITTEN BY CTRANS(A)*X.
56905      */
56906     if( jump==4 )
56907     {
56908         jlast = j;
56909         j = rcond_internalcomplexrcondicmax1(x, n, _state);
56910         if( ae_fp_neq(ae_c_abs(x->ptr.p_complex[jlast], _state),ae_c_abs(x->ptr.p_complex[j], _state))&&iter<itmax )
56911         {
56912             iter = iter+1;
56913 
56914             /*
56915              * MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
56916              */
56917             for(i=1; i<=n; i++)
56918             {
56919                 x->ptr.p_complex[i] = ae_complex_from_i(0);
56920             }
56921             x->ptr.p_complex[j] = ae_complex_from_i(1);
56922             *kase = 1;
56923             jump = 3;
56924             rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56925             return;
56926         }
56927 
56928         /*
56929          * ITERATION COMPLETE.  FINAL STAGE.
56930          */
56931         altsgn = (double)(1);
56932         for(i=1; i<=n; i++)
56933         {
56934             x->ptr.p_complex[i] = ae_complex_from_d(altsgn*(1+(double)(i-1)/(double)(n-1)));
56935             altsgn = -altsgn;
56936         }
56937         *kase = 1;
56938         jump = 5;
56939         rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56940         return;
56941     }
56942 
56943     /*
56944      * ENTRY   (JUMP = 5)
56945      * X HAS BEEN OVERWRITTEN BY A*X.
56946      */
56947     if( jump==5 )
56948     {
56949         temp = 2*(rcond_internalcomplexrcondscsum1(x, n, _state)/(3*n));
56950         if( ae_fp_greater(temp,*est) )
56951         {
56952             ae_v_cmove(&v->ptr.p_complex[1], 1, &x->ptr.p_complex[1], 1, "N", ae_v_len(1,n));
56953             *est = temp;
56954         }
56955         *kase = 0;
56956         rcond_internalcomplexrcondsaveall(isave, rsave, &i, &iter, &j, &jlast, &jump, &absxi, &altsgn, &estold, &temp, _state);
56957         return;
56958     }
56959 }
56960 
56961 
rcond_internalcomplexrcondscsum1(ae_vector * x,ae_int_t n,ae_state * _state)56962 static double rcond_internalcomplexrcondscsum1(/* Complex */ ae_vector* x,
56963      ae_int_t n,
56964      ae_state *_state)
56965 {
56966     ae_int_t i;
56967     double result;
56968 
56969 
56970     result = (double)(0);
56971     for(i=1; i<=n; i++)
56972     {
56973         result = result+ae_c_abs(x->ptr.p_complex[i], _state);
56974     }
56975     return result;
56976 }
56977 
56978 
rcond_internalcomplexrcondicmax1(ae_vector * x,ae_int_t n,ae_state * _state)56979 static ae_int_t rcond_internalcomplexrcondicmax1(/* Complex */ ae_vector* x,
56980      ae_int_t n,
56981      ae_state *_state)
56982 {
56983     ae_int_t i;
56984     double m;
56985     ae_int_t result;
56986 
56987 
56988     result = 1;
56989     m = ae_c_abs(x->ptr.p_complex[1], _state);
56990     for(i=2; i<=n; i++)
56991     {
56992         if( ae_fp_greater(ae_c_abs(x->ptr.p_complex[i], _state),m) )
56993         {
56994             result = i;
56995             m = ae_c_abs(x->ptr.p_complex[i], _state);
56996         }
56997     }
56998     return result;
56999 }
57000 
57001 
rcond_internalcomplexrcondsaveall(ae_vector * isave,ae_vector * rsave,ae_int_t * i,ae_int_t * iter,ae_int_t * j,ae_int_t * jlast,ae_int_t * jump,double * absxi,double * altsgn,double * estold,double * temp,ae_state * _state)57002 static void rcond_internalcomplexrcondsaveall(/* Integer */ ae_vector* isave,
57003      /* Real    */ ae_vector* rsave,
57004      ae_int_t* i,
57005      ae_int_t* iter,
57006      ae_int_t* j,
57007      ae_int_t* jlast,
57008      ae_int_t* jump,
57009      double* absxi,
57010      double* altsgn,
57011      double* estold,
57012      double* temp,
57013      ae_state *_state)
57014 {
57015 
57016 
57017     isave->ptr.p_int[0] = *i;
57018     isave->ptr.p_int[1] = *iter;
57019     isave->ptr.p_int[2] = *j;
57020     isave->ptr.p_int[3] = *jlast;
57021     isave->ptr.p_int[4] = *jump;
57022     rsave->ptr.p_double[0] = *absxi;
57023     rsave->ptr.p_double[1] = *altsgn;
57024     rsave->ptr.p_double[2] = *estold;
57025     rsave->ptr.p_double[3] = *temp;
57026 }
57027 
57028 
rcond_internalcomplexrcondloadall(ae_vector * isave,ae_vector * rsave,ae_int_t * i,ae_int_t * iter,ae_int_t * j,ae_int_t * jlast,ae_int_t * jump,double * absxi,double * altsgn,double * estold,double * temp,ae_state * _state)57029 static void rcond_internalcomplexrcondloadall(/* Integer */ ae_vector* isave,
57030      /* Real    */ ae_vector* rsave,
57031      ae_int_t* i,
57032      ae_int_t* iter,
57033      ae_int_t* j,
57034      ae_int_t* jlast,
57035      ae_int_t* jump,
57036      double* absxi,
57037      double* altsgn,
57038      double* estold,
57039      double* temp,
57040      ae_state *_state)
57041 {
57042 
57043 
57044     *i = isave->ptr.p_int[0];
57045     *iter = isave->ptr.p_int[1];
57046     *j = isave->ptr.p_int[2];
57047     *jlast = isave->ptr.p_int[3];
57048     *jump = isave->ptr.p_int[4];
57049     *absxi = rsave->ptr.p_double[0];
57050     *altsgn = rsave->ptr.p_double[1];
57051     *estold = rsave->ptr.p_double[2];
57052     *temp = rsave->ptr.p_double[3];
57053 }
57054 
57055 
57056 #endif
57057 #if defined(AE_COMPILE_FBLS) || !defined(AE_PARTIAL_BUILD)
57058 
57059 
57060 /*************************************************************************
57061 Basic Cholesky solver for ScaleA*Cholesky(A)'*x = y.
57062 
57063 This subroutine assumes that:
57064 * A*ScaleA is well scaled
57065 * A is well-conditioned, so no zero divisions or overflow may occur
57066 
57067 INPUT PARAMETERS:
57068     CHA     -   Cholesky decomposition of A
57069     SqrtScaleA- square root of scale factor ScaleA
57070     N       -   matrix size, N>=0.
57071     IsUpper -   storage type
57072     XB      -   right part
57073     Tmp     -   buffer; function automatically allocates it, if it is  too
57074                 small.  It  can  be  reused  if function is called several
57075                 times.
57076 
57077 OUTPUT PARAMETERS:
57078     XB      -   solution
57079 
57080 NOTE 1: no assertion or tests are done during algorithm operation
57081 NOTE 2: N=0 will force algorithm to silently return
57082 
57083   -- ALGLIB --
57084      Copyright 13.10.2010 by Bochkanov Sergey
57085 *************************************************************************/
fblscholeskysolve(ae_matrix * cha,double sqrtscalea,ae_int_t n,ae_bool isupper,ae_vector * xb,ae_vector * tmp,ae_state * _state)57086 void fblscholeskysolve(/* Real    */ ae_matrix* cha,
57087      double sqrtscalea,
57088      ae_int_t n,
57089      ae_bool isupper,
57090      /* Real    */ ae_vector* xb,
57091      /* Real    */ ae_vector* tmp,
57092      ae_state *_state)
57093 {
57094     double v;
57095 
57096 
57097     if( n<=0 )
57098     {
57099         return;
57100     }
57101     if( tmp->cnt<n )
57102     {
57103         ae_vector_set_length(tmp, n, _state);
57104     }
57105 
57106     /*
57107      * Scale right part
57108      */
57109     v = 1/ae_sqr(sqrtscalea, _state);
57110     ae_v_muld(&xb->ptr.p_double[0], 1, ae_v_len(0,n-1), v);
57111 
57112     /*
57113      * Solve A = L*L' or A=U'*U
57114      */
57115     if( isupper )
57116     {
57117 
57118         /*
57119          * Solve U'*y=b first.
57120          */
57121         rmatrixtrsv(n, cha, 0, 0, ae_true, ae_false, 1, xb, 0, _state);
57122 
57123         /*
57124          * Solve U*x=y then.
57125          */
57126         rmatrixtrsv(n, cha, 0, 0, ae_true, ae_false, 0, xb, 0, _state);
57127     }
57128     else
57129     {
57130 
57131         /*
57132          * Solve L*y=b first
57133          */
57134         rmatrixtrsv(n, cha, 0, 0, ae_false, ae_false, 0, xb, 0, _state);
57135 
57136         /*
57137          * Solve L'*x=y then.
57138          */
57139         rmatrixtrsv(n, cha, 0, 0, ae_false, ae_false, 1, xb, 0, _state);
57140     }
57141 }
57142 
57143 
57144 /*************************************************************************
57145 Fast basic linear solver: linear SPD CG
57146 
57147 Solves (A^T*A + alpha*I)*x = b where:
57148 * A is MxN matrix
57149 * alpha>0 is a scalar
57150 * I is NxN identity matrix
57151 * b is Nx1 vector
57152 * X is Nx1 unknown vector.
57153 
57154 N iterations of linear conjugate gradient are used to solve problem.
57155 
57156 INPUT PARAMETERS:
57157     A   -   array[M,N], matrix
57158     M   -   number of rows
57159     N   -   number of unknowns
57160     B   -   array[N], right part
57161     X   -   initial approxumation, array[N]
57162     Buf -   buffer; function automatically allocates it, if it is too
57163             small. It can be reused if function is called several times
57164             with same M and N.
57165 
57166 OUTPUT PARAMETERS:
57167     X   -   improved solution
57168 
57169 NOTES:
57170 *   solver checks quality of improved solution. If (because of problem
57171     condition number, numerical noise, etc.) new solution is WORSE than
57172     original approximation, then original approximation is returned.
57173 *   solver assumes that both A, B, Alpha are well scaled (i.e. they are
57174     less than sqrt(overflow) and greater than sqrt(underflow)).
57175 
57176   -- ALGLIB --
57177      Copyright 20.08.2009 by Bochkanov Sergey
57178 *************************************************************************/
fblssolvecgx(ae_matrix * a,ae_int_t m,ae_int_t n,double alpha,ae_vector * b,ae_vector * x,ae_vector * buf,ae_state * _state)57179 void fblssolvecgx(/* Real    */ ae_matrix* a,
57180      ae_int_t m,
57181      ae_int_t n,
57182      double alpha,
57183      /* Real    */ ae_vector* b,
57184      /* Real    */ ae_vector* x,
57185      /* Real    */ ae_vector* buf,
57186      ae_state *_state)
57187 {
57188     ae_int_t k;
57189     ae_int_t offsrk;
57190     ae_int_t offsrk1;
57191     ae_int_t offsxk;
57192     ae_int_t offsxk1;
57193     ae_int_t offspk;
57194     ae_int_t offspk1;
57195     ae_int_t offstmp1;
57196     ae_int_t offstmp2;
57197     ae_int_t bs;
57198     double e1;
57199     double e2;
57200     double rk2;
57201     double rk12;
57202     double pap;
57203     double s;
57204     double betak;
57205     double v1;
57206     double v2;
57207 
57208 
57209 
57210     /*
57211      * Test for special case: B=0
57212      */
57213     v1 = ae_v_dotproduct(&b->ptr.p_double[0], 1, &b->ptr.p_double[0], 1, ae_v_len(0,n-1));
57214     if( ae_fp_eq(v1,(double)(0)) )
57215     {
57216         for(k=0; k<=n-1; k++)
57217         {
57218             x->ptr.p_double[k] = (double)(0);
57219         }
57220         return;
57221     }
57222 
57223     /*
57224      * Offsets inside Buf for:
57225      * * R[K], R[K+1]
57226      * * X[K], X[K+1]
57227      * * P[K], P[K+1]
57228      * * Tmp1 - array[M], Tmp2 - array[N]
57229      */
57230     offsrk = 0;
57231     offsrk1 = offsrk+n;
57232     offsxk = offsrk1+n;
57233     offsxk1 = offsxk+n;
57234     offspk = offsxk1+n;
57235     offspk1 = offspk+n;
57236     offstmp1 = offspk1+n;
57237     offstmp2 = offstmp1+m;
57238     bs = offstmp2+n;
57239     if( buf->cnt<bs )
57240     {
57241         ae_vector_set_length(buf, bs, _state);
57242     }
57243 
57244     /*
57245      * x(0) = x
57246      */
57247     ae_v_move(&buf->ptr.p_double[offsxk], 1, &x->ptr.p_double[0], 1, ae_v_len(offsxk,offsxk+n-1));
57248 
57249     /*
57250      * r(0) = b-A*x(0)
57251      * RK2 = r(0)'*r(0)
57252      */
57253     rmatrixmv(m, n, a, 0, 0, 0, buf, offsxk, buf, offstmp1, _state);
57254     rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state);
57255     ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha);
57256     ae_v_move(&buf->ptr.p_double[offsrk], 1, &b->ptr.p_double[0], 1, ae_v_len(offsrk,offsrk+n-1));
57257     ae_v_sub(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk,offsrk+n-1));
57258     rk2 = ae_v_dotproduct(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk,offsrk+n-1));
57259     ae_v_move(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offspk,offspk+n-1));
57260     e1 = ae_sqrt(rk2, _state);
57261 
57262     /*
57263      * Cycle
57264      */
57265     for(k=0; k<=n-1; k++)
57266     {
57267 
57268         /*
57269          * Calculate A*p(k) - store in Buf[OffsTmp2:OffsTmp2+N-1]
57270          * and p(k)'*A*p(k)  - store in PAP
57271          *
57272          * If PAP=0, break (iteration is over)
57273          */
57274         rmatrixmv(m, n, a, 0, 0, 0, buf, offspk, buf, offstmp1, _state);
57275         v1 = ae_v_dotproduct(&buf->ptr.p_double[offstmp1], 1, &buf->ptr.p_double[offstmp1], 1, ae_v_len(offstmp1,offstmp1+m-1));
57276         v2 = ae_v_dotproduct(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offspk,offspk+n-1));
57277         pap = v1+alpha*v2;
57278         rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state);
57279         ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha);
57280         if( ae_fp_eq(pap,(double)(0)) )
57281         {
57282             break;
57283         }
57284 
57285         /*
57286          * S = (r(k)'*r(k))/(p(k)'*A*p(k))
57287          */
57288         s = rk2/pap;
57289 
57290         /*
57291          * x(k+1) = x(k) + S*p(k)
57292          */
57293         ae_v_move(&buf->ptr.p_double[offsxk1], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offsxk1,offsxk1+n-1));
57294         ae_v_addd(&buf->ptr.p_double[offsxk1], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offsxk1,offsxk1+n-1), s);
57295 
57296         /*
57297          * r(k+1) = r(k) - S*A*p(k)
57298          * RK12 = r(k+1)'*r(k+1)
57299          *
57300          * Break if r(k+1) small enough (when compared to r(k))
57301          */
57302         ae_v_move(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk1,offsrk1+n-1));
57303         ae_v_subd(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk1,offsrk1+n-1), s);
57304         rk12 = ae_v_dotproduct(&buf->ptr.p_double[offsrk1], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offsrk1,offsrk1+n-1));
57305         if( ae_fp_less_eq(ae_sqrt(rk12, _state),100*ae_machineepsilon*ae_sqrt(rk2, _state)) )
57306         {
57307 
57308             /*
57309              * X(k) = x(k+1) before exit -
57310              * - because we expect to find solution at x(k)
57311              */
57312             ae_v_move(&buf->ptr.p_double[offsxk], 1, &buf->ptr.p_double[offsxk1], 1, ae_v_len(offsxk,offsxk+n-1));
57313             break;
57314         }
57315 
57316         /*
57317          * BetaK = RK12/RK2
57318          * p(k+1) = r(k+1)+betak*p(k)
57319          */
57320         betak = rk12/rk2;
57321         ae_v_move(&buf->ptr.p_double[offspk1], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offspk1,offspk1+n-1));
57322         ae_v_addd(&buf->ptr.p_double[offspk1], 1, &buf->ptr.p_double[offspk], 1, ae_v_len(offspk1,offspk1+n-1), betak);
57323 
57324         /*
57325          * r(k) := r(k+1)
57326          * x(k) := x(k+1)
57327          * p(k) := p(k+1)
57328          */
57329         ae_v_move(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk1], 1, ae_v_len(offsrk,offsrk+n-1));
57330         ae_v_move(&buf->ptr.p_double[offsxk], 1, &buf->ptr.p_double[offsxk1], 1, ae_v_len(offsxk,offsxk+n-1));
57331         ae_v_move(&buf->ptr.p_double[offspk], 1, &buf->ptr.p_double[offspk1], 1, ae_v_len(offspk,offspk+n-1));
57332         rk2 = rk12;
57333     }
57334 
57335     /*
57336      * Calculate E2
57337      */
57338     rmatrixmv(m, n, a, 0, 0, 0, buf, offsxk, buf, offstmp1, _state);
57339     rmatrixmv(n, m, a, 0, 0, 1, buf, offstmp1, buf, offstmp2, _state);
57340     ae_v_addd(&buf->ptr.p_double[offstmp2], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(offstmp2,offstmp2+n-1), alpha);
57341     ae_v_move(&buf->ptr.p_double[offsrk], 1, &b->ptr.p_double[0], 1, ae_v_len(offsrk,offsrk+n-1));
57342     ae_v_sub(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offstmp2], 1, ae_v_len(offsrk,offsrk+n-1));
57343     v1 = ae_v_dotproduct(&buf->ptr.p_double[offsrk], 1, &buf->ptr.p_double[offsrk], 1, ae_v_len(offsrk,offsrk+n-1));
57344     e2 = ae_sqrt(v1, _state);
57345 
57346     /*
57347      * Output result (if it was improved)
57348      */
57349     if( ae_fp_less(e2,e1) )
57350     {
57351         ae_v_move(&x->ptr.p_double[0], 1, &buf->ptr.p_double[offsxk], 1, ae_v_len(0,n-1));
57352     }
57353 }
57354 
57355 
57356 /*************************************************************************
57357 Construction of linear conjugate gradient solver.
57358 
57359 State parameter passed using "shared" semantics (i.e. previous state is NOT
57360 erased). When it is already initialized, we can reause prevously allocated
57361 memory.
57362 
57363 INPUT PARAMETERS:
57364     X       -   initial solution
57365     B       -   right part
57366     N       -   system size
57367     State   -   structure; may be preallocated, if we want to reuse memory
57368 
57369 OUTPUT PARAMETERS:
57370     State   -   structure which is used by FBLSCGIteration() to store
57371                 algorithm state between subsequent calls.
57372 
57373 NOTE: no error checking is done; caller must check all parameters, prevent
57374       overflows, and so on.
57375 
57376   -- ALGLIB --
57377      Copyright 22.10.2009 by Bochkanov Sergey
57378 *************************************************************************/
fblscgcreate(ae_vector * x,ae_vector * b,ae_int_t n,fblslincgstate * state,ae_state * _state)57379 void fblscgcreate(/* Real    */ ae_vector* x,
57380      /* Real    */ ae_vector* b,
57381      ae_int_t n,
57382      fblslincgstate* state,
57383      ae_state *_state)
57384 {
57385 
57386 
57387     if( state->b.cnt<n )
57388     {
57389         ae_vector_set_length(&state->b, n, _state);
57390     }
57391     if( state->rk.cnt<n )
57392     {
57393         ae_vector_set_length(&state->rk, n, _state);
57394     }
57395     if( state->rk1.cnt<n )
57396     {
57397         ae_vector_set_length(&state->rk1, n, _state);
57398     }
57399     if( state->xk.cnt<n )
57400     {
57401         ae_vector_set_length(&state->xk, n, _state);
57402     }
57403     if( state->xk1.cnt<n )
57404     {
57405         ae_vector_set_length(&state->xk1, n, _state);
57406     }
57407     if( state->pk.cnt<n )
57408     {
57409         ae_vector_set_length(&state->pk, n, _state);
57410     }
57411     if( state->pk1.cnt<n )
57412     {
57413         ae_vector_set_length(&state->pk1, n, _state);
57414     }
57415     if( state->tmp2.cnt<n )
57416     {
57417         ae_vector_set_length(&state->tmp2, n, _state);
57418     }
57419     if( state->x.cnt<n )
57420     {
57421         ae_vector_set_length(&state->x, n, _state);
57422     }
57423     if( state->ax.cnt<n )
57424     {
57425         ae_vector_set_length(&state->ax, n, _state);
57426     }
57427     state->n = n;
57428     ae_v_move(&state->xk.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
57429     ae_v_move(&state->b.ptr.p_double[0], 1, &b->ptr.p_double[0], 1, ae_v_len(0,n-1));
57430     ae_vector_set_length(&state->rstate.ia, 1+1, _state);
57431     ae_vector_set_length(&state->rstate.ra, 6+1, _state);
57432     state->rstate.stage = -1;
57433 }
57434 
57435 
57436 /*************************************************************************
57437 Linear CG solver, function relying on reverse communication to calculate
57438 matrix-vector products.
57439 
57440 See comments for FBLSLinCGState structure for more info.
57441 
57442   -- ALGLIB --
57443      Copyright 22.10.2009 by Bochkanov Sergey
57444 *************************************************************************/
fblscgiteration(fblslincgstate * state,ae_state * _state)57445 ae_bool fblscgiteration(fblslincgstate* state, ae_state *_state)
57446 {
57447     ae_int_t n;
57448     ae_int_t k;
57449     double rk2;
57450     double rk12;
57451     double pap;
57452     double s;
57453     double betak;
57454     double v1;
57455     double v2;
57456     ae_bool result;
57457 
57458 
57459 
57460     /*
57461      * Reverse communication preparations
57462      * I know it looks ugly, but it works the same way
57463      * anywhere from C++ to Python.
57464      *
57465      * This code initializes locals by:
57466      * * random values determined during code
57467      *   generation - on first subroutine call
57468      * * values from previous call - on subsequent calls
57469      */
57470     if( state->rstate.stage>=0 )
57471     {
57472         n = state->rstate.ia.ptr.p_int[0];
57473         k = state->rstate.ia.ptr.p_int[1];
57474         rk2 = state->rstate.ra.ptr.p_double[0];
57475         rk12 = state->rstate.ra.ptr.p_double[1];
57476         pap = state->rstate.ra.ptr.p_double[2];
57477         s = state->rstate.ra.ptr.p_double[3];
57478         betak = state->rstate.ra.ptr.p_double[4];
57479         v1 = state->rstate.ra.ptr.p_double[5];
57480         v2 = state->rstate.ra.ptr.p_double[6];
57481     }
57482     else
57483     {
57484         n = 359;
57485         k = -58;
57486         rk2 = -919;
57487         rk12 = -909;
57488         pap = 81;
57489         s = 255;
57490         betak = 74;
57491         v1 = -788;
57492         v2 = 809;
57493     }
57494     if( state->rstate.stage==0 )
57495     {
57496         goto lbl_0;
57497     }
57498     if( state->rstate.stage==1 )
57499     {
57500         goto lbl_1;
57501     }
57502     if( state->rstate.stage==2 )
57503     {
57504         goto lbl_2;
57505     }
57506 
57507     /*
57508      * Routine body
57509      */
57510 
57511     /*
57512      * prepare locals
57513      */
57514     n = state->n;
57515 
57516     /*
57517      * Test for special case: B=0
57518      */
57519     v1 = ae_v_dotproduct(&state->b.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1));
57520     if( ae_fp_eq(v1,(double)(0)) )
57521     {
57522         for(k=0; k<=n-1; k++)
57523         {
57524             state->xk.ptr.p_double[k] = (double)(0);
57525         }
57526         result = ae_false;
57527         return result;
57528     }
57529 
57530     /*
57531      * r(0) = b-A*x(0)
57532      * RK2 = r(0)'*r(0)
57533      */
57534     ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
57535     state->rstate.stage = 0;
57536     goto lbl_rcomm;
57537 lbl_0:
57538     ae_v_move(&state->rk.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1));
57539     ae_v_sub(&state->rk.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1));
57540     rk2 = ae_v_dotproduct(&state->rk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
57541     ae_v_move(&state->pk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
57542     state->e1 = ae_sqrt(rk2, _state);
57543 
57544     /*
57545      * Cycle
57546      */
57547     k = 0;
57548 lbl_3:
57549     if( k>n-1 )
57550     {
57551         goto lbl_5;
57552     }
57553 
57554     /*
57555      * Calculate A*p(k) - store in State.Tmp2
57556      * and p(k)'*A*p(k)  - store in PAP
57557      *
57558      * If PAP=0, break (iteration is over)
57559      */
57560     ae_v_move(&state->x.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1));
57561     state->rstate.stage = 1;
57562     goto lbl_rcomm;
57563 lbl_1:
57564     ae_v_move(&state->tmp2.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1));
57565     pap = state->xax;
57566     if( !ae_isfinite(pap, _state) )
57567     {
57568         goto lbl_5;
57569     }
57570     if( ae_fp_less_eq(pap,(double)(0)) )
57571     {
57572         goto lbl_5;
57573     }
57574 
57575     /*
57576      * S = (r(k)'*r(k))/(p(k)'*A*p(k))
57577      */
57578     s = rk2/pap;
57579 
57580     /*
57581      * x(k+1) = x(k) + S*p(k)
57582      */
57583     ae_v_move(&state->xk1.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
57584     ae_v_addd(&state->xk1.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1), s);
57585 
57586     /*
57587      * r(k+1) = r(k) - S*A*p(k)
57588      * RK12 = r(k+1)'*r(k+1)
57589      *
57590      * Break if r(k+1) small enough (when compared to r(k))
57591      */
57592     ae_v_move(&state->rk1.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
57593     ae_v_subd(&state->rk1.ptr.p_double[0], 1, &state->tmp2.ptr.p_double[0], 1, ae_v_len(0,n-1), s);
57594     rk12 = ae_v_dotproduct(&state->rk1.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
57595     if( ae_fp_less_eq(ae_sqrt(rk12, _state),100*ae_machineepsilon*state->e1) )
57596     {
57597 
57598         /*
57599          * X(k) = x(k+1) before exit -
57600          * - because we expect to find solution at x(k)
57601          */
57602         ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
57603         goto lbl_5;
57604     }
57605 
57606     /*
57607      * BetaK = RK12/RK2
57608      * p(k+1) = r(k+1)+betak*p(k)
57609      *
57610      * NOTE: we expect that BetaK won't overflow because of
57611      * "Sqrt(RK12)<=100*MachineEpsilon*E1" test above.
57612      */
57613     betak = rk12/rk2;
57614     ae_v_move(&state->pk1.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
57615     ae_v_addd(&state->pk1.ptr.p_double[0], 1, &state->pk.ptr.p_double[0], 1, ae_v_len(0,n-1), betak);
57616 
57617     /*
57618      * r(k) := r(k+1)
57619      * x(k) := x(k+1)
57620      * p(k) := p(k+1)
57621      */
57622     ae_v_move(&state->rk.ptr.p_double[0], 1, &state->rk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
57623     ae_v_move(&state->xk.ptr.p_double[0], 1, &state->xk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
57624     ae_v_move(&state->pk.ptr.p_double[0], 1, &state->pk1.ptr.p_double[0], 1, ae_v_len(0,n-1));
57625     rk2 = rk12;
57626     k = k+1;
57627     goto lbl_3;
57628 lbl_5:
57629 
57630     /*
57631      * Calculate E2
57632      */
57633     ae_v_move(&state->x.ptr.p_double[0], 1, &state->xk.ptr.p_double[0], 1, ae_v_len(0,n-1));
57634     state->rstate.stage = 2;
57635     goto lbl_rcomm;
57636 lbl_2:
57637     ae_v_move(&state->rk.ptr.p_double[0], 1, &state->b.ptr.p_double[0], 1, ae_v_len(0,n-1));
57638     ae_v_sub(&state->rk.ptr.p_double[0], 1, &state->ax.ptr.p_double[0], 1, ae_v_len(0,n-1));
57639     v1 = ae_v_dotproduct(&state->rk.ptr.p_double[0], 1, &state->rk.ptr.p_double[0], 1, ae_v_len(0,n-1));
57640     state->e2 = ae_sqrt(v1, _state);
57641     result = ae_false;
57642     return result;
57643 
57644     /*
57645      * Saving state
57646      */
57647 lbl_rcomm:
57648     result = ae_true;
57649     state->rstate.ia.ptr.p_int[0] = n;
57650     state->rstate.ia.ptr.p_int[1] = k;
57651     state->rstate.ra.ptr.p_double[0] = rk2;
57652     state->rstate.ra.ptr.p_double[1] = rk12;
57653     state->rstate.ra.ptr.p_double[2] = pap;
57654     state->rstate.ra.ptr.p_double[3] = s;
57655     state->rstate.ra.ptr.p_double[4] = betak;
57656     state->rstate.ra.ptr.p_double[5] = v1;
57657     state->rstate.ra.ptr.p_double[6] = v2;
57658     return result;
57659 }
57660 
57661 
57662 /*************************************************************************
57663 Construction of GMRES(k) solver.
57664 
57665 State parameter passed using "shared" semantics (i.e. previous state is NOT
57666 erased). When it is already initialized, we can reause prevously allocated
57667 memory.
57668 
57669 After (but not before!) initialization you can tweak following fields (they
57670 are initialized by default values, but you can change it):
57671 * State.EpsOrt - stop if norm of new candidate for orthogonalization is below EpsOrt
57672 * State.EpsRes - stop of residual decreased below EpsRes*|B|
57673 * State.EpsRed - stop if relative reduction of residual |R(k+1)|/|R(k)|>EpsRed
57674 
57675 INPUT PARAMETERS:
57676     B       -   right part
57677     N       -   system size
57678     K       -   iterations count, K>=1
57679     State   -   structure; may be preallocated, if we want to reuse memory
57680 
57681 OUTPUT PARAMETERS:
57682     State   -   structure which is used by FBLSGMRESIteration() to store
57683                 algorithm state between subsequent calls.
57684 
57685 NOTE: no error checking is done; caller must check all parameters, prevent
57686       overflows, and so on.
57687 
57688   -- ALGLIB --
57689      Copyright 18.11.2020 by Bochkanov Sergey
57690 *************************************************************************/
fblsgmrescreate(ae_vector * b,ae_int_t n,ae_int_t k,fblsgmresstate * state,ae_state * _state)57691 void fblsgmrescreate(/* Real    */ ae_vector* b,
57692      ae_int_t n,
57693      ae_int_t k,
57694      fblsgmresstate* state,
57695      ae_state *_state)
57696 {
57697 
57698 
57699     ae_assert((n>0&&k>0)&&k<=n, "FBLSGMRESCreate: incorrect params", _state);
57700     state->n = n;
57701     state->itscnt = k;
57702     state->epsort = (1000+ae_sqrt((double)(n), _state))*ae_machineepsilon;
57703     state->epsres = (1000+ae_sqrt((double)(n), _state))*ae_machineepsilon;
57704     state->epsred = 1.0;
57705     state->epsdiag = (10000+n)*ae_machineepsilon;
57706     state->itsperformed = 0;
57707     state->retcode = 0;
57708     rcopyallocv(n, b, &state->b, _state);
57709     rallocv(n, &state->x, _state);
57710     rallocv(n, &state->ax, _state);
57711     ae_vector_set_length(&state->rstate.ia, 4+1, _state);
57712     ae_vector_set_length(&state->rstate.ra, 10+1, _state);
57713     state->rstate.stage = -1;
57714 }
57715 
57716 
57717 /*************************************************************************
57718 Linear CG solver, function relying on reverse communication to calculate
57719 matrix-vector products.
57720 
57721 See comments for FBLSLinCGState structure for more info.
57722 
57723   -- ALGLIB --
57724      Copyright 22.10.2009 by Bochkanov Sergey
57725 *************************************************************************/
fblsgmresiteration(fblsgmresstate * state,ae_state * _state)57726 ae_bool fblsgmresiteration(fblsgmresstate* state, ae_state *_state)
57727 {
57728     ae_int_t n;
57729     ae_int_t itidx;
57730     ae_int_t kdim;
57731     double rmax;
57732     double rmindiag;
57733     double cs;
57734     double sn;
57735     double v;
57736     double vv;
57737     double anrm;
57738     double qnrm;
57739     double bnrm;
57740     double resnrm;
57741     double prevresnrm;
57742     ae_int_t i;
57743     ae_int_t j;
57744     ae_bool result;
57745 
57746 
57747 
57748     /*
57749      * Reverse communication preparations
57750      * I know it looks ugly, but it works the same way
57751      * anywhere from C++ to Python.
57752      *
57753      * This code initializes locals by:
57754      * * random values determined during code
57755      *   generation - on first subroutine call
57756      * * values from previous call - on subsequent calls
57757      */
57758     if( state->rstate.stage>=0 )
57759     {
57760         n = state->rstate.ia.ptr.p_int[0];
57761         itidx = state->rstate.ia.ptr.p_int[1];
57762         kdim = state->rstate.ia.ptr.p_int[2];
57763         i = state->rstate.ia.ptr.p_int[3];
57764         j = state->rstate.ia.ptr.p_int[4];
57765         rmax = state->rstate.ra.ptr.p_double[0];
57766         rmindiag = state->rstate.ra.ptr.p_double[1];
57767         cs = state->rstate.ra.ptr.p_double[2];
57768         sn = state->rstate.ra.ptr.p_double[3];
57769         v = state->rstate.ra.ptr.p_double[4];
57770         vv = state->rstate.ra.ptr.p_double[5];
57771         anrm = state->rstate.ra.ptr.p_double[6];
57772         qnrm = state->rstate.ra.ptr.p_double[7];
57773         bnrm = state->rstate.ra.ptr.p_double[8];
57774         resnrm = state->rstate.ra.ptr.p_double[9];
57775         prevresnrm = state->rstate.ra.ptr.p_double[10];
57776     }
57777     else
57778     {
57779         n = 205;
57780         itidx = -838;
57781         kdim = 939;
57782         i = -526;
57783         j = 763;
57784         rmax = -541;
57785         rmindiag = -698;
57786         cs = -900;
57787         sn = -318;
57788         v = -940;
57789         vv = 1016;
57790         anrm = -229;
57791         qnrm = -536;
57792         bnrm = 487;
57793         resnrm = -115;
57794         prevresnrm = 886;
57795     }
57796     if( state->rstate.stage==0 )
57797     {
57798         goto lbl_0;
57799     }
57800 
57801     /*
57802      * Routine body
57803      */
57804     n = state->n;
57805     state->retcode = 1;
57806 
57807     /*
57808      * Set up Q0
57809      */
57810     rsetallocv(n, 0.0, &state->xs, _state);
57811     bnrm = ae_sqrt(rdotv2(n, &state->b, _state), _state);
57812     if( ae_fp_eq(bnrm,(double)(0)) )
57813     {
57814         result = ae_false;
57815         return result;
57816     }
57817     rallocm(state->itscnt+1, n, &state->qi, _state);
57818     rallocm(state->itscnt, n, &state->aqi, _state);
57819     rcopymulvr(n, 1/bnrm, &state->b, &state->qi, 0, _state);
57820     rsetallocm(state->itscnt+1, state->itscnt, 0.0, &state->h, _state);
57821     rsetallocm(state->itscnt+1, state->itscnt, 0.0, &state->hr, _state);
57822     rsetallocm(state->itscnt+1, state->itscnt+1, 0.0, &state->hq, _state);
57823     for(i=0; i<=state->itscnt; i++)
57824     {
57825         state->hq.ptr.pp_double[i][i] = (double)(1);
57826     }
57827     rsetallocv(state->itscnt+1, 0.0, &state->hqb, _state);
57828     state->hqb.ptr.p_double[0] = bnrm;
57829 
57830     /*
57831      * Perform iteration
57832      */
57833     resnrm = bnrm;
57834     kdim = 0;
57835     rmax = (double)(0);
57836     rmindiag = 1.0E99;
57837     rsetallocv(state->itscnt, 0.0, &state->ys, _state);
57838     rallocv(ae_maxint(n, state->itscnt+2, _state), &state->tmp0, _state);
57839     rallocv(ae_maxint(n, state->itscnt+2, _state), &state->tmp1, _state);
57840     itidx = 0;
57841 lbl_1:
57842     if( itidx>state->itscnt-1 )
57843     {
57844         goto lbl_3;
57845     }
57846     prevresnrm = resnrm;
57847 
57848     /*
57849      * Compute A*Qi[ItIdx], then compute Qi[ItIdx+1]
57850      */
57851     rcopyrv(n, &state->qi, itidx, &state->x, _state);
57852     state->rstate.stage = 0;
57853     goto lbl_rcomm;
57854 lbl_0:
57855     rcopyvr(n, &state->ax, &state->aqi, itidx, _state);
57856     anrm = ae_sqrt(rdotv2(n, &state->ax, _state), _state);
57857     if( ae_fp_eq(anrm,(double)(0)) )
57858     {
57859         state->retcode = 2;
57860         goto lbl_3;
57861     }
57862     rowwisegramschmidt(&state->qi, itidx+1, n, &state->ax, &state->tmp0, ae_true, _state);
57863     rowwisegramschmidt(&state->qi, itidx+1, n, &state->ax, &state->tmp1, ae_true, _state);
57864     raddvc(itidx+1, 1.0, &state->tmp0, &state->h, itidx, _state);
57865     raddvc(itidx+1, 1.0, &state->tmp1, &state->h, itidx, _state);
57866     qnrm = ae_sqrt(rdotv2(n, &state->ax, _state), _state);
57867     state->h.ptr.pp_double[itidx+1][itidx] = qnrm;
57868     rmulv(n, 1/coalesce(qnrm, (double)(1), _state), &state->ax, _state);
57869     rcopyvr(n, &state->ax, &state->qi, itidx+1, _state);
57870 
57871     /*
57872      * We have QR decomposition of H from the previous iteration:
57873      * * (ItIdx+1)*(ItIdx+1) orthogonal HQ embedded into larger (ItIdx+2)*(ItIdx+2) identity matrix
57874      * * (ItIdx+1)*ItIdx     triangular HR embedded into larger (ItIdx+2)*(ItIdx+1) zero matrix
57875      *
57876      * We just have to update QR decomposition after one more column is added to H:
57877      * * multiply this column by HQ to obtain (ItIdx+2)-dimensional vector X
57878      * * generate rotation to nullify last element of X to obtain (ItIdx+1)-dimensional vector Y
57879      *   that is copied into (ItIdx+1)-th column of HR
57880      * * apply same rotation to HQ
57881      * * apply same rotation to HQB - current right-hand side
57882      */
57883     rcopycv(itidx+2, &state->h, itidx, &state->tmp0, _state);
57884     rmatrixgemv(itidx+2, itidx+2, 1.0, &state->hq, 0, 0, 0, &state->tmp0, 0, 0.0, &state->tmp1, 0, _state);
57885     generaterotation(state->tmp1.ptr.p_double[itidx], state->tmp1.ptr.p_double[itidx+1], &cs, &sn, &v, _state);
57886     state->tmp1.ptr.p_double[itidx] = v;
57887     state->tmp1.ptr.p_double[itidx+1] = (double)(0);
57888     rmax = ae_maxreal(rmax, rmaxabsv(itidx+2, &state->tmp1, _state), _state);
57889     rmindiag = ae_minreal(rmindiag, ae_fabs(v, _state), _state);
57890     if( ae_fp_less_eq(rmindiag,rmax*state->epsdiag) )
57891     {
57892         state->retcode = 3;
57893         goto lbl_3;
57894     }
57895     rcopyvc(itidx+2, &state->tmp1, &state->hr, itidx, _state);
57896     for(j=0; j<=itidx+1; j++)
57897     {
57898         v = state->hq.ptr.pp_double[itidx+0][j];
57899         vv = state->hq.ptr.pp_double[itidx+1][j];
57900         state->hq.ptr.pp_double[itidx+0][j] = cs*v+sn*vv;
57901         state->hq.ptr.pp_double[itidx+1][j] = -sn*v+cs*vv;
57902     }
57903     v = state->hqb.ptr.p_double[itidx+0];
57904     vv = state->hqb.ptr.p_double[itidx+1];
57905     state->hqb.ptr.p_double[itidx+0] = cs*v+sn*vv;
57906     state->hqb.ptr.p_double[itidx+1] = -sn*v+cs*vv;
57907     resnrm = ae_fabs(state->hqb.ptr.p_double[itidx+1], _state);
57908 
57909     /*
57910      * Previous attempt to extend R was successful (no small diagonal elements).
57911      * Increase Krylov subspace dimensionality.
57912      */
57913     kdim = kdim+1;
57914 
57915     /*
57916      * Iteration is over.
57917      * Terminate if:
57918      * * last Qi was nearly zero after orthogonalization.
57919      * * sufficient decrease of residual
57920      * * stagnation of residual
57921      */
57922     state->itsperformed = state->itsperformed+1;
57923     if( ae_fp_less_eq(qnrm,state->epsort*anrm)||ae_fp_eq(qnrm,(double)(0)) )
57924     {
57925         state->retcode = 4;
57926         goto lbl_3;
57927     }
57928     if( ae_fp_less_eq(resnrm,state->epsres*bnrm) )
57929     {
57930         state->retcode = 5;
57931         goto lbl_3;
57932     }
57933     if( ae_fp_greater(resnrm/prevresnrm,state->epsred) )
57934     {
57935         state->retcode = 6;
57936         goto lbl_3;
57937     }
57938     itidx = itidx+1;
57939     goto lbl_1;
57940 lbl_3:
57941 
57942     /*
57943      * Post-solve
57944      */
57945     if( kdim>0 )
57946     {
57947         rcopyv(kdim, &state->hqb, &state->ys, _state);
57948         rmatrixtrsv(kdim, &state->hr, 0, 0, ae_true, ae_false, 0, &state->ys, 0, _state);
57949         rmatrixmv(n, kdim, &state->qi, 0, 0, 1, &state->ys, 0, &state->xs, 0, _state);
57950     }
57951     result = ae_false;
57952     return result;
57953 
57954     /*
57955      * Saving state
57956      */
57957 lbl_rcomm:
57958     result = ae_true;
57959     state->rstate.ia.ptr.p_int[0] = n;
57960     state->rstate.ia.ptr.p_int[1] = itidx;
57961     state->rstate.ia.ptr.p_int[2] = kdim;
57962     state->rstate.ia.ptr.p_int[3] = i;
57963     state->rstate.ia.ptr.p_int[4] = j;
57964     state->rstate.ra.ptr.p_double[0] = rmax;
57965     state->rstate.ra.ptr.p_double[1] = rmindiag;
57966     state->rstate.ra.ptr.p_double[2] = cs;
57967     state->rstate.ra.ptr.p_double[3] = sn;
57968     state->rstate.ra.ptr.p_double[4] = v;
57969     state->rstate.ra.ptr.p_double[5] = vv;
57970     state->rstate.ra.ptr.p_double[6] = anrm;
57971     state->rstate.ra.ptr.p_double[7] = qnrm;
57972     state->rstate.ra.ptr.p_double[8] = bnrm;
57973     state->rstate.ra.ptr.p_double[9] = resnrm;
57974     state->rstate.ra.ptr.p_double[10] = prevresnrm;
57975     return result;
57976 }
57977 
57978 
57979 /*************************************************************************
57980 Fast  least  squares  solver,  solves  well  conditioned  system   without
57981 performing  any  checks  for  degeneracy,  and using user-provided buffers
57982 (which are automatically reallocated if too small).
57983 
57984 This  function  is  intended  for solution of moderately sized systems. It
57985 uses factorization algorithms based on Level 2 BLAS  operations,  thus  it
57986 won't work efficiently on large scale systems.
57987 
57988 INPUT PARAMETERS:
57989     A       -   array[M,N], system matrix.
57990                 Contents of A is destroyed during solution.
57991     B       -   array[M], right part
57992     M       -   number of equations
57993     N       -   number of variables, N<=M
57994     Tmp0, Tmp1, Tmp2-
57995                 buffers; function automatically allocates them, if they are
57996                 too  small. They can  be  reused  if  function  is   called
57997                 several times.
57998 
57999 OUTPUT PARAMETERS:
58000     B       -   solution (first N components, next M-N are zero)
58001 
58002   -- ALGLIB --
58003      Copyright 20.01.2012 by Bochkanov Sergey
58004 *************************************************************************/
fblssolvels(ae_matrix * a,ae_vector * b,ae_int_t m,ae_int_t n,ae_vector * tmp0,ae_vector * tmp1,ae_vector * tmp2,ae_state * _state)58005 void fblssolvels(/* Real    */ ae_matrix* a,
58006      /* Real    */ ae_vector* b,
58007      ae_int_t m,
58008      ae_int_t n,
58009      /* Real    */ ae_vector* tmp0,
58010      /* Real    */ ae_vector* tmp1,
58011      /* Real    */ ae_vector* tmp2,
58012      ae_state *_state)
58013 {
58014     ae_int_t i;
58015     ae_int_t k;
58016     double v;
58017 
58018 
58019     ae_assert(n>0, "FBLSSolveLS: N<=0", _state);
58020     ae_assert(m>=n, "FBLSSolveLS: M<N", _state);
58021     ae_assert(a->rows>=m, "FBLSSolveLS: Rows(A)<M", _state);
58022     ae_assert(a->cols>=n, "FBLSSolveLS: Cols(A)<N", _state);
58023     ae_assert(b->cnt>=m, "FBLSSolveLS: Length(B)<M", _state);
58024 
58025     /*
58026      * Allocate temporaries
58027      */
58028     rvectorsetlengthatleast(tmp0, ae_maxint(m, n, _state)+1, _state);
58029     rvectorsetlengthatleast(tmp1, ae_maxint(m, n, _state)+1, _state);
58030     rvectorsetlengthatleast(tmp2, ae_minint(m, n, _state), _state);
58031 
58032     /*
58033      * Call basecase QR
58034      */
58035     rmatrixqrbasecase(a, m, n, tmp0, tmp1, tmp2, _state);
58036 
58037     /*
58038      * Multiply B by Q'
58039      */
58040     for(k=0; k<=n-1; k++)
58041     {
58042         for(i=0; i<=k-1; i++)
58043         {
58044             tmp0->ptr.p_double[i] = (double)(0);
58045         }
58046         ae_v_move(&tmp0->ptr.p_double[k], 1, &a->ptr.pp_double[k][k], a->stride, ae_v_len(k,m-1));
58047         tmp0->ptr.p_double[k] = (double)(1);
58048         v = ae_v_dotproduct(&tmp0->ptr.p_double[k], 1, &b->ptr.p_double[k], 1, ae_v_len(k,m-1));
58049         v = v*tmp2->ptr.p_double[k];
58050         ae_v_subd(&b->ptr.p_double[k], 1, &tmp0->ptr.p_double[k], 1, ae_v_len(k,m-1), v);
58051     }
58052 
58053     /*
58054      * Solve triangular system
58055      */
58056     b->ptr.p_double[n-1] = b->ptr.p_double[n-1]/a->ptr.pp_double[n-1][n-1];
58057     for(i=n-2; i>=0; i--)
58058     {
58059         v = ae_v_dotproduct(&a->ptr.pp_double[i][i+1], 1, &b->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1));
58060         b->ptr.p_double[i] = (b->ptr.p_double[i]-v)/a->ptr.pp_double[i][i];
58061     }
58062     for(i=n; i<=m-1; i++)
58063     {
58064         b->ptr.p_double[i] = 0.0;
58065     }
58066 }
58067 
58068 
_fblslincgstate_init(void * _p,ae_state * _state,ae_bool make_automatic)58069 void _fblslincgstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
58070 {
58071     fblslincgstate *p = (fblslincgstate*)_p;
58072     ae_touch_ptr((void*)p);
58073     ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
58074     ae_vector_init(&p->ax, 0, DT_REAL, _state, make_automatic);
58075     ae_vector_init(&p->rk, 0, DT_REAL, _state, make_automatic);
58076     ae_vector_init(&p->rk1, 0, DT_REAL, _state, make_automatic);
58077     ae_vector_init(&p->xk, 0, DT_REAL, _state, make_automatic);
58078     ae_vector_init(&p->xk1, 0, DT_REAL, _state, make_automatic);
58079     ae_vector_init(&p->pk, 0, DT_REAL, _state, make_automatic);
58080     ae_vector_init(&p->pk1, 0, DT_REAL, _state, make_automatic);
58081     ae_vector_init(&p->b, 0, DT_REAL, _state, make_automatic);
58082     _rcommstate_init(&p->rstate, _state, make_automatic);
58083     ae_vector_init(&p->tmp2, 0, DT_REAL, _state, make_automatic);
58084 }
58085 
58086 
_fblslincgstate_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)58087 void _fblslincgstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
58088 {
58089     fblslincgstate *dst = (fblslincgstate*)_dst;
58090     fblslincgstate *src = (fblslincgstate*)_src;
58091     dst->e1 = src->e1;
58092     dst->e2 = src->e2;
58093     ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
58094     ae_vector_init_copy(&dst->ax, &src->ax, _state, make_automatic);
58095     dst->xax = src->xax;
58096     dst->n = src->n;
58097     ae_vector_init_copy(&dst->rk, &src->rk, _state, make_automatic);
58098     ae_vector_init_copy(&dst->rk1, &src->rk1, _state, make_automatic);
58099     ae_vector_init_copy(&dst->xk, &src->xk, _state, make_automatic);
58100     ae_vector_init_copy(&dst->xk1, &src->xk1, _state, make_automatic);
58101     ae_vector_init_copy(&dst->pk, &src->pk, _state, make_automatic);
58102     ae_vector_init_copy(&dst->pk1, &src->pk1, _state, make_automatic);
58103     ae_vector_init_copy(&dst->b, &src->b, _state, make_automatic);
58104     _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
58105     ae_vector_init_copy(&dst->tmp2, &src->tmp2, _state, make_automatic);
58106 }
58107 
58108 
_fblslincgstate_clear(void * _p)58109 void _fblslincgstate_clear(void* _p)
58110 {
58111     fblslincgstate *p = (fblslincgstate*)_p;
58112     ae_touch_ptr((void*)p);
58113     ae_vector_clear(&p->x);
58114     ae_vector_clear(&p->ax);
58115     ae_vector_clear(&p->rk);
58116     ae_vector_clear(&p->rk1);
58117     ae_vector_clear(&p->xk);
58118     ae_vector_clear(&p->xk1);
58119     ae_vector_clear(&p->pk);
58120     ae_vector_clear(&p->pk1);
58121     ae_vector_clear(&p->b);
58122     _rcommstate_clear(&p->rstate);
58123     ae_vector_clear(&p->tmp2);
58124 }
58125 
58126 
_fblslincgstate_destroy(void * _p)58127 void _fblslincgstate_destroy(void* _p)
58128 {
58129     fblslincgstate *p = (fblslincgstate*)_p;
58130     ae_touch_ptr((void*)p);
58131     ae_vector_destroy(&p->x);
58132     ae_vector_destroy(&p->ax);
58133     ae_vector_destroy(&p->rk);
58134     ae_vector_destroy(&p->rk1);
58135     ae_vector_destroy(&p->xk);
58136     ae_vector_destroy(&p->xk1);
58137     ae_vector_destroy(&p->pk);
58138     ae_vector_destroy(&p->pk1);
58139     ae_vector_destroy(&p->b);
58140     _rcommstate_destroy(&p->rstate);
58141     ae_vector_destroy(&p->tmp2);
58142 }
58143 
58144 
_fblsgmresstate_init(void * _p,ae_state * _state,ae_bool make_automatic)58145 void _fblsgmresstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
58146 {
58147     fblsgmresstate *p = (fblsgmresstate*)_p;
58148     ae_touch_ptr((void*)p);
58149     ae_vector_init(&p->b, 0, DT_REAL, _state, make_automatic);
58150     ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
58151     ae_vector_init(&p->ax, 0, DT_REAL, _state, make_automatic);
58152     ae_vector_init(&p->xs, 0, DT_REAL, _state, make_automatic);
58153     ae_matrix_init(&p->qi, 0, 0, DT_REAL, _state, make_automatic);
58154     ae_matrix_init(&p->aqi, 0, 0, DT_REAL, _state, make_automatic);
58155     ae_matrix_init(&p->h, 0, 0, DT_REAL, _state, make_automatic);
58156     ae_matrix_init(&p->hq, 0, 0, DT_REAL, _state, make_automatic);
58157     ae_matrix_init(&p->hr, 0, 0, DT_REAL, _state, make_automatic);
58158     ae_vector_init(&p->hqb, 0, DT_REAL, _state, make_automatic);
58159     ae_vector_init(&p->ys, 0, DT_REAL, _state, make_automatic);
58160     ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
58161     ae_vector_init(&p->tmp1, 0, DT_REAL, _state, make_automatic);
58162     _rcommstate_init(&p->rstate, _state, make_automatic);
58163 }
58164 
58165 
_fblsgmresstate_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)58166 void _fblsgmresstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
58167 {
58168     fblsgmresstate *dst = (fblsgmresstate*)_dst;
58169     fblsgmresstate *src = (fblsgmresstate*)_src;
58170     ae_vector_init_copy(&dst->b, &src->b, _state, make_automatic);
58171     ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
58172     ae_vector_init_copy(&dst->ax, &src->ax, _state, make_automatic);
58173     ae_vector_init_copy(&dst->xs, &src->xs, _state, make_automatic);
58174     ae_matrix_init_copy(&dst->qi, &src->qi, _state, make_automatic);
58175     ae_matrix_init_copy(&dst->aqi, &src->aqi, _state, make_automatic);
58176     ae_matrix_init_copy(&dst->h, &src->h, _state, make_automatic);
58177     ae_matrix_init_copy(&dst->hq, &src->hq, _state, make_automatic);
58178     ae_matrix_init_copy(&dst->hr, &src->hr, _state, make_automatic);
58179     ae_vector_init_copy(&dst->hqb, &src->hqb, _state, make_automatic);
58180     ae_vector_init_copy(&dst->ys, &src->ys, _state, make_automatic);
58181     ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
58182     ae_vector_init_copy(&dst->tmp1, &src->tmp1, _state, make_automatic);
58183     dst->n = src->n;
58184     dst->itscnt = src->itscnt;
58185     dst->epsort = src->epsort;
58186     dst->epsres = src->epsres;
58187     dst->epsred = src->epsred;
58188     dst->epsdiag = src->epsdiag;
58189     dst->itsperformed = src->itsperformed;
58190     dst->retcode = src->retcode;
58191     _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
58192 }
58193 
58194 
_fblsgmresstate_clear(void * _p)58195 void _fblsgmresstate_clear(void* _p)
58196 {
58197     fblsgmresstate *p = (fblsgmresstate*)_p;
58198     ae_touch_ptr((void*)p);
58199     ae_vector_clear(&p->b);
58200     ae_vector_clear(&p->x);
58201     ae_vector_clear(&p->ax);
58202     ae_vector_clear(&p->xs);
58203     ae_matrix_clear(&p->qi);
58204     ae_matrix_clear(&p->aqi);
58205     ae_matrix_clear(&p->h);
58206     ae_matrix_clear(&p->hq);
58207     ae_matrix_clear(&p->hr);
58208     ae_vector_clear(&p->hqb);
58209     ae_vector_clear(&p->ys);
58210     ae_vector_clear(&p->tmp0);
58211     ae_vector_clear(&p->tmp1);
58212     _rcommstate_clear(&p->rstate);
58213 }
58214 
58215 
_fblsgmresstate_destroy(void * _p)58216 void _fblsgmresstate_destroy(void* _p)
58217 {
58218     fblsgmresstate *p = (fblsgmresstate*)_p;
58219     ae_touch_ptr((void*)p);
58220     ae_vector_destroy(&p->b);
58221     ae_vector_destroy(&p->x);
58222     ae_vector_destroy(&p->ax);
58223     ae_vector_destroy(&p->xs);
58224     ae_matrix_destroy(&p->qi);
58225     ae_matrix_destroy(&p->aqi);
58226     ae_matrix_destroy(&p->h);
58227     ae_matrix_destroy(&p->hq);
58228     ae_matrix_destroy(&p->hr);
58229     ae_vector_destroy(&p->hqb);
58230     ae_vector_destroy(&p->ys);
58231     ae_vector_destroy(&p->tmp0);
58232     ae_vector_destroy(&p->tmp1);
58233     _rcommstate_destroy(&p->rstate);
58234 }
58235 
58236 
58237 #endif
58238 #if defined(AE_COMPILE_NORMESTIMATOR) || !defined(AE_PARTIAL_BUILD)
58239 
58240 
58241 /*************************************************************************
58242 This procedure initializes matrix norm estimator.
58243 
58244 USAGE:
58245 1. User initializes algorithm state with NormEstimatorCreate() call
58246 2. User calls NormEstimatorEstimateSparse() (or NormEstimatorIteration())
58247 3. User calls NormEstimatorResults() to get solution.
58248 
58249 INPUT PARAMETERS:
58250     M       -   number of rows in the matrix being estimated, M>0
58251     N       -   number of columns in the matrix being estimated, N>0
58252     NStart  -   number of random starting vectors
58253                 recommended value - at least 5.
58254     NIts    -   number of iterations to do with best starting vector
58255                 recommended value - at least 5.
58256 
58257 OUTPUT PARAMETERS:
58258     State   -   structure which stores algorithm state
58259 
58260 
58261 NOTE: this algorithm is effectively deterministic, i.e. it always  returns
58262 same result when repeatedly called for the same matrix. In fact, algorithm
58263 uses randomized starting vectors, but internal  random  numbers  generator
58264 always generates same sequence of the random values (it is a  feature, not
58265 bug).
58266 
58267 Algorithm can be made non-deterministic with NormEstimatorSetSeed(0) call.
58268 
58269   -- ALGLIB --
58270      Copyright 06.12.2011 by Bochkanov Sergey
58271 *************************************************************************/
normestimatorcreate(ae_int_t m,ae_int_t n,ae_int_t nstart,ae_int_t nits,normestimatorstate * state,ae_state * _state)58272 void normestimatorcreate(ae_int_t m,
58273      ae_int_t n,
58274      ae_int_t nstart,
58275      ae_int_t nits,
58276      normestimatorstate* state,
58277      ae_state *_state)
58278 {
58279 
58280     _normestimatorstate_clear(state);
58281 
58282     ae_assert(m>0, "NormEstimatorCreate: M<=0", _state);
58283     ae_assert(n>0, "NormEstimatorCreate: N<=0", _state);
58284     ae_assert(nstart>0, "NormEstimatorCreate: NStart<=0", _state);
58285     ae_assert(nits>0, "NormEstimatorCreate: NIts<=0", _state);
58286     state->m = m;
58287     state->n = n;
58288     state->nstart = nstart;
58289     state->nits = nits;
58290     state->seedval = 11;
58291     hqrndrandomize(&state->r, _state);
58292     ae_vector_set_length(&state->x0, state->n, _state);
58293     ae_vector_set_length(&state->t, state->m, _state);
58294     ae_vector_set_length(&state->x1, state->n, _state);
58295     ae_vector_set_length(&state->xbest, state->n, _state);
58296     ae_vector_set_length(&state->x, ae_maxint(state->n, state->m, _state), _state);
58297     ae_vector_set_length(&state->mv, state->m, _state);
58298     ae_vector_set_length(&state->mtv, state->n, _state);
58299     ae_vector_set_length(&state->rstate.ia, 3+1, _state);
58300     ae_vector_set_length(&state->rstate.ra, 2+1, _state);
58301     state->rstate.stage = -1;
58302 }
58303 
58304 
58305 /*************************************************************************
58306 This function changes seed value used by algorithm. In some cases we  need
58307 deterministic processing, i.e. subsequent calls must return equal results,
58308 in other cases we need non-deterministic algorithm which returns different
58309 results for the same matrix on every pass.
58310 
58311 Setting zero seed will lead to non-deterministic algorithm, while non-zero
58312 value will make our algorithm deterministic.
58313 
58314 INPUT PARAMETERS:
58315     State       -   norm estimator state, must be initialized with a  call
58316                     to NormEstimatorCreate()
58317     SeedVal     -   seed value, >=0. Zero value = non-deterministic algo.
58318 
58319   -- ALGLIB --
58320      Copyright 06.12.2011 by Bochkanov Sergey
58321 *************************************************************************/
normestimatorsetseed(normestimatorstate * state,ae_int_t seedval,ae_state * _state)58322 void normestimatorsetseed(normestimatorstate* state,
58323      ae_int_t seedval,
58324      ae_state *_state)
58325 {
58326 
58327 
58328     ae_assert(seedval>=0, "NormEstimatorSetSeed: SeedVal<0", _state);
58329     state->seedval = seedval;
58330 }
58331 
58332 
58333 /*************************************************************************
58334 
58335   -- ALGLIB --
58336      Copyright 06.12.2011 by Bochkanov Sergey
58337 *************************************************************************/
normestimatoriteration(normestimatorstate * state,ae_state * _state)58338 ae_bool normestimatoriteration(normestimatorstate* state,
58339      ae_state *_state)
58340 {
58341     ae_int_t n;
58342     ae_int_t m;
58343     ae_int_t i;
58344     ae_int_t itcnt;
58345     double v;
58346     double growth;
58347     double bestgrowth;
58348     ae_bool result;
58349 
58350 
58351 
58352     /*
58353      * Reverse communication preparations
58354      * I know it looks ugly, but it works the same way
58355      * anywhere from C++ to Python.
58356      *
58357      * This code initializes locals by:
58358      * * random values determined during code
58359      *   generation - on first subroutine call
58360      * * values from previous call - on subsequent calls
58361      */
58362     if( state->rstate.stage>=0 )
58363     {
58364         n = state->rstate.ia.ptr.p_int[0];
58365         m = state->rstate.ia.ptr.p_int[1];
58366         i = state->rstate.ia.ptr.p_int[2];
58367         itcnt = state->rstate.ia.ptr.p_int[3];
58368         v = state->rstate.ra.ptr.p_double[0];
58369         growth = state->rstate.ra.ptr.p_double[1];
58370         bestgrowth = state->rstate.ra.ptr.p_double[2];
58371     }
58372     else
58373     {
58374         n = 359;
58375         m = -58;
58376         i = -919;
58377         itcnt = -909;
58378         v = 81;
58379         growth = 255;
58380         bestgrowth = 74;
58381     }
58382     if( state->rstate.stage==0 )
58383     {
58384         goto lbl_0;
58385     }
58386     if( state->rstate.stage==1 )
58387     {
58388         goto lbl_1;
58389     }
58390     if( state->rstate.stage==2 )
58391     {
58392         goto lbl_2;
58393     }
58394     if( state->rstate.stage==3 )
58395     {
58396         goto lbl_3;
58397     }
58398 
58399     /*
58400      * Routine body
58401      */
58402     n = state->n;
58403     m = state->m;
58404     if( state->seedval>0 )
58405     {
58406         hqrndseed(state->seedval, state->seedval+2, &state->r, _state);
58407     }
58408     bestgrowth = (double)(0);
58409     state->xbest.ptr.p_double[0] = (double)(1);
58410     for(i=1; i<=n-1; i++)
58411     {
58412         state->xbest.ptr.p_double[i] = (double)(0);
58413     }
58414     itcnt = 0;
58415 lbl_4:
58416     if( itcnt>state->nstart-1 )
58417     {
58418         goto lbl_6;
58419     }
58420     do
58421     {
58422         v = (double)(0);
58423         for(i=0; i<=n-1; i++)
58424         {
58425             state->x0.ptr.p_double[i] = hqrndnormal(&state->r, _state);
58426             v = v+ae_sqr(state->x0.ptr.p_double[i], _state);
58427         }
58428     }
58429     while(ae_fp_eq(v,(double)(0)));
58430     v = 1/ae_sqrt(v, _state);
58431     ae_v_muld(&state->x0.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
58432     ae_v_move(&state->x.ptr.p_double[0], 1, &state->x0.ptr.p_double[0], 1, ae_v_len(0,n-1));
58433     state->needmv = ae_true;
58434     state->needmtv = ae_false;
58435     state->rstate.stage = 0;
58436     goto lbl_rcomm;
58437 lbl_0:
58438     ae_v_move(&state->x.ptr.p_double[0], 1, &state->mv.ptr.p_double[0], 1, ae_v_len(0,m-1));
58439     state->needmv = ae_false;
58440     state->needmtv = ae_true;
58441     state->rstate.stage = 1;
58442     goto lbl_rcomm;
58443 lbl_1:
58444     ae_v_move(&state->x1.ptr.p_double[0], 1, &state->mtv.ptr.p_double[0], 1, ae_v_len(0,n-1));
58445     v = (double)(0);
58446     for(i=0; i<=n-1; i++)
58447     {
58448         v = v+ae_sqr(state->x1.ptr.p_double[i], _state);
58449     }
58450     growth = ae_sqrt(ae_sqrt(v, _state), _state);
58451     if( ae_fp_greater(growth,bestgrowth) )
58452     {
58453         v = 1/ae_sqrt(v, _state);
58454         ae_v_moved(&state->xbest.ptr.p_double[0], 1, &state->x1.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
58455         bestgrowth = growth;
58456     }
58457     itcnt = itcnt+1;
58458     goto lbl_4;
58459 lbl_6:
58460     ae_v_move(&state->x0.ptr.p_double[0], 1, &state->xbest.ptr.p_double[0], 1, ae_v_len(0,n-1));
58461     itcnt = 0;
58462 lbl_7:
58463     if( itcnt>state->nits-1 )
58464     {
58465         goto lbl_9;
58466     }
58467     ae_v_move(&state->x.ptr.p_double[0], 1, &state->x0.ptr.p_double[0], 1, ae_v_len(0,n-1));
58468     state->needmv = ae_true;
58469     state->needmtv = ae_false;
58470     state->rstate.stage = 2;
58471     goto lbl_rcomm;
58472 lbl_2:
58473     ae_v_move(&state->x.ptr.p_double[0], 1, &state->mv.ptr.p_double[0], 1, ae_v_len(0,m-1));
58474     state->needmv = ae_false;
58475     state->needmtv = ae_true;
58476     state->rstate.stage = 3;
58477     goto lbl_rcomm;
58478 lbl_3:
58479     ae_v_move(&state->x1.ptr.p_double[0], 1, &state->mtv.ptr.p_double[0], 1, ae_v_len(0,n-1));
58480     v = (double)(0);
58481     for(i=0; i<=n-1; i++)
58482     {
58483         v = v+ae_sqr(state->x1.ptr.p_double[i], _state);
58484     }
58485     state->repnorm = ae_sqrt(ae_sqrt(v, _state), _state);
58486     if( ae_fp_neq(v,(double)(0)) )
58487     {
58488         v = 1/ae_sqrt(v, _state);
58489         ae_v_moved(&state->x0.ptr.p_double[0], 1, &state->x1.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
58490     }
58491     itcnt = itcnt+1;
58492     goto lbl_7;
58493 lbl_9:
58494     result = ae_false;
58495     return result;
58496 
58497     /*
58498      * Saving state
58499      */
58500 lbl_rcomm:
58501     result = ae_true;
58502     state->rstate.ia.ptr.p_int[0] = n;
58503     state->rstate.ia.ptr.p_int[1] = m;
58504     state->rstate.ia.ptr.p_int[2] = i;
58505     state->rstate.ia.ptr.p_int[3] = itcnt;
58506     state->rstate.ra.ptr.p_double[0] = v;
58507     state->rstate.ra.ptr.p_double[1] = growth;
58508     state->rstate.ra.ptr.p_double[2] = bestgrowth;
58509     return result;
58510 }
58511 
58512 
58513 /*************************************************************************
58514 This function estimates norm of the sparse M*N matrix A.
58515 
58516 INPUT PARAMETERS:
58517     State       -   norm estimator state, must be initialized with a  call
58518                     to NormEstimatorCreate()
58519     A           -   sparse M*N matrix, must be converted to CRS format
58520                     prior to calling this function.
58521 
58522 After this function  is  over  you can call NormEstimatorResults() to get
58523 estimate of the norm(A).
58524 
58525   -- ALGLIB --
58526      Copyright 06.12.2011 by Bochkanov Sergey
58527 *************************************************************************/
normestimatorestimatesparse(normestimatorstate * state,sparsematrix * a,ae_state * _state)58528 void normestimatorestimatesparse(normestimatorstate* state,
58529      sparsematrix* a,
58530      ae_state *_state)
58531 {
58532 
58533 
58534     normestimatorrestart(state, _state);
58535     while(normestimatoriteration(state, _state))
58536     {
58537         if( state->needmv )
58538         {
58539             sparsemv(a, &state->x, &state->mv, _state);
58540             continue;
58541         }
58542         if( state->needmtv )
58543         {
58544             sparsemtv(a, &state->x, &state->mtv, _state);
58545             continue;
58546         }
58547     }
58548 }
58549 
58550 
58551 /*************************************************************************
58552 Matrix norm estimation results
58553 
58554 INPUT PARAMETERS:
58555     State   -   algorithm state
58556 
58557 OUTPUT PARAMETERS:
58558     Nrm     -   estimate of the matrix norm, Nrm>=0
58559 
58560   -- ALGLIB --
58561      Copyright 06.12.2011 by Bochkanov Sergey
58562 *************************************************************************/
normestimatorresults(normestimatorstate * state,double * nrm,ae_state * _state)58563 void normestimatorresults(normestimatorstate* state,
58564      double* nrm,
58565      ae_state *_state)
58566 {
58567 
58568     *nrm = 0;
58569 
58570     *nrm = state->repnorm;
58571 }
58572 
58573 
58574 /*************************************************************************
58575 This  function  restarts estimator and prepares it for the next estimation
58576 round.
58577 
58578 INPUT PARAMETERS:
58579     State   -   algorithm state
58580   -- ALGLIB --
58581      Copyright 06.12.2011 by Bochkanov Sergey
58582 *************************************************************************/
normestimatorrestart(normestimatorstate * state,ae_state * _state)58583 void normestimatorrestart(normestimatorstate* state, ae_state *_state)
58584 {
58585 
58586 
58587     ae_vector_set_length(&state->rstate.ia, 3+1, _state);
58588     ae_vector_set_length(&state->rstate.ra, 2+1, _state);
58589     state->rstate.stage = -1;
58590 }
58591 
58592 
_normestimatorstate_init(void * _p,ae_state * _state,ae_bool make_automatic)58593 void _normestimatorstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
58594 {
58595     normestimatorstate *p = (normestimatorstate*)_p;
58596     ae_touch_ptr((void*)p);
58597     ae_vector_init(&p->x0, 0, DT_REAL, _state, make_automatic);
58598     ae_vector_init(&p->x1, 0, DT_REAL, _state, make_automatic);
58599     ae_vector_init(&p->t, 0, DT_REAL, _state, make_automatic);
58600     ae_vector_init(&p->xbest, 0, DT_REAL, _state, make_automatic);
58601     _hqrndstate_init(&p->r, _state, make_automatic);
58602     ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
58603     ae_vector_init(&p->mv, 0, DT_REAL, _state, make_automatic);
58604     ae_vector_init(&p->mtv, 0, DT_REAL, _state, make_automatic);
58605     _rcommstate_init(&p->rstate, _state, make_automatic);
58606 }
58607 
58608 
_normestimatorstate_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)58609 void _normestimatorstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
58610 {
58611     normestimatorstate *dst = (normestimatorstate*)_dst;
58612     normestimatorstate *src = (normestimatorstate*)_src;
58613     dst->n = src->n;
58614     dst->m = src->m;
58615     dst->nstart = src->nstart;
58616     dst->nits = src->nits;
58617     dst->seedval = src->seedval;
58618     ae_vector_init_copy(&dst->x0, &src->x0, _state, make_automatic);
58619     ae_vector_init_copy(&dst->x1, &src->x1, _state, make_automatic);
58620     ae_vector_init_copy(&dst->t, &src->t, _state, make_automatic);
58621     ae_vector_init_copy(&dst->xbest, &src->xbest, _state, make_automatic);
58622     _hqrndstate_init_copy(&dst->r, &src->r, _state, make_automatic);
58623     ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
58624     ae_vector_init_copy(&dst->mv, &src->mv, _state, make_automatic);
58625     ae_vector_init_copy(&dst->mtv, &src->mtv, _state, make_automatic);
58626     dst->needmv = src->needmv;
58627     dst->needmtv = src->needmtv;
58628     dst->repnorm = src->repnorm;
58629     _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
58630 }
58631 
58632 
_normestimatorstate_clear(void * _p)58633 void _normestimatorstate_clear(void* _p)
58634 {
58635     normestimatorstate *p = (normestimatorstate*)_p;
58636     ae_touch_ptr((void*)p);
58637     ae_vector_clear(&p->x0);
58638     ae_vector_clear(&p->x1);
58639     ae_vector_clear(&p->t);
58640     ae_vector_clear(&p->xbest);
58641     _hqrndstate_clear(&p->r);
58642     ae_vector_clear(&p->x);
58643     ae_vector_clear(&p->mv);
58644     ae_vector_clear(&p->mtv);
58645     _rcommstate_clear(&p->rstate);
58646 }
58647 
58648 
_normestimatorstate_destroy(void * _p)58649 void _normestimatorstate_destroy(void* _p)
58650 {
58651     normestimatorstate *p = (normestimatorstate*)_p;
58652     ae_touch_ptr((void*)p);
58653     ae_vector_destroy(&p->x0);
58654     ae_vector_destroy(&p->x1);
58655     ae_vector_destroy(&p->t);
58656     ae_vector_destroy(&p->xbest);
58657     _hqrndstate_destroy(&p->r);
58658     ae_vector_destroy(&p->x);
58659     ae_vector_destroy(&p->mv);
58660     ae_vector_destroy(&p->mtv);
58661     _rcommstate_destroy(&p->rstate);
58662 }
58663 
58664 
58665 #endif
58666 #if defined(AE_COMPILE_MATINV) || !defined(AE_PARTIAL_BUILD)
58667 
58668 
58669 /*************************************************************************
58670 Inversion of a matrix given by its LU decomposition.
58671 
58672 INPUT PARAMETERS:
58673     A       -   LU decomposition of the matrix
58674                 (output of RMatrixLU subroutine).
58675     Pivots  -   table of permutations
58676                 (the output of RMatrixLU subroutine).
58677     N       -   size of matrix A (optional) :
58678                 * if given, only principal NxN submatrix is processed  and
58679                   overwritten. other elements are unchanged.
58680                 * if not given,  size  is  automatically  determined  from
58681                   matrix size (A must be square matrix)
58682 
58683 OUTPUT PARAMETERS:
58684     Info    -   return code:
58685                 * -3    A is singular, or VERY close to singular.
58686                         it is filled by zeros in such cases.
58687                 *  1    task is solved (but matrix A may be ill-conditioned,
58688                         check R1/RInf parameters for condition numbers).
58689     Rep     -   solver report, see below for more info
58690     A       -   inverse of matrix A.
58691                 Array whose indexes range within [0..N-1, 0..N-1].
58692 
58693 SOLVER REPORT
58694 
58695 Subroutine sets following fields of the Rep structure:
58696 * R1        reciprocal of condition number: 1/cond(A), 1-norm.
58697 * RInf      reciprocal of condition number: 1/cond(A), inf-norm.
58698 
58699   ! FREE EDITION OF ALGLIB:
58700   !
58701   ! Free Edition of ALGLIB supports following important features for  this
58702   ! function:
58703   ! * C++ version: x64 SIMD support using C++ intrinsics
58704   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
58705   !
58706   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
58707   ! Reference Manual in order  to  find  out  how to activate SIMD support
58708   ! in ALGLIB.
58709 
58710   ! COMMERCIAL EDITION OF ALGLIB:
58711   !
58712   ! Commercial Edition of ALGLIB includes following important improvements
58713   ! of this function:
58714   ! * high-performance native backend with same C# interface (C# version)
58715   ! * multithreading support (C++ and C# versions)
58716   ! * hardware vendor (Intel) implementations of linear algebra primitives
58717   !   (C++ and C# versions, x86/x64 platform)
58718   !
58719   ! We recommend you to read 'Working with commercial version' section  of
58720   ! ALGLIB Reference Manual in order to find out how to  use  performance-
58721   ! related features provided by commercial edition of ALGLIB.
58722 
58723   -- ALGLIB routine --
58724      05.02.2010
58725      Bochkanov Sergey
58726 *************************************************************************/
rmatrixluinverse(ae_matrix * a,ae_vector * pivots,ae_int_t n,ae_int_t * info,matinvreport * rep,ae_state * _state)58727 void rmatrixluinverse(/* Real    */ ae_matrix* a,
58728      /* Integer */ ae_vector* pivots,
58729      ae_int_t n,
58730      ae_int_t* info,
58731      matinvreport* rep,
58732      ae_state *_state)
58733 {
58734     ae_frame _frame_block;
58735     ae_vector work;
58736     ae_int_t i;
58737     ae_int_t j;
58738     ae_int_t k;
58739     double v;
58740     sinteger sinfo;
58741 
58742     ae_frame_make(_state, &_frame_block);
58743     memset(&work, 0, sizeof(work));
58744     memset(&sinfo, 0, sizeof(sinfo));
58745     *info = 0;
58746     _matinvreport_clear(rep);
58747     ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
58748     _sinteger_init(&sinfo, _state, ae_true);
58749 
58750     ae_assert(n>0, "RMatrixLUInverse: N<=0!", _state);
58751     ae_assert(a->cols>=n, "RMatrixLUInverse: cols(A)<N!", _state);
58752     ae_assert(a->rows>=n, "RMatrixLUInverse: rows(A)<N!", _state);
58753     ae_assert(pivots->cnt>=n, "RMatrixLUInverse: len(Pivots)<N!", _state);
58754     ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixLUInverse: A contains infinite or NaN values!", _state);
58755     *info = 1;
58756     for(i=0; i<=n-1; i++)
58757     {
58758         if( pivots->ptr.p_int[i]>n-1||pivots->ptr.p_int[i]<i )
58759         {
58760             *info = -1;
58761         }
58762     }
58763     ae_assert(*info>0, "RMatrixLUInverse: incorrect Pivots array!", _state);
58764 
58765     /*
58766      * calculate condition numbers
58767      */
58768     rep->r1 = rmatrixlurcond1(a, n, _state);
58769     rep->rinf = rmatrixlurcondinf(a, n, _state);
58770     if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
58771     {
58772         for(i=0; i<=n-1; i++)
58773         {
58774             for(j=0; j<=n-1; j++)
58775             {
58776                 a->ptr.pp_double[i][j] = (double)(0);
58777             }
58778         }
58779         rep->r1 = (double)(0);
58780         rep->rinf = (double)(0);
58781         *info = -3;
58782         ae_frame_leave(_state);
58783         return;
58784     }
58785 
58786     /*
58787      * Call cache-oblivious code
58788      */
58789     ae_vector_set_length(&work, n, _state);
58790     sinfo.val = 1;
58791     matinv_rmatrixluinverserec(a, 0, n, &work, &sinfo, rep, _state);
58792     *info = sinfo.val;
58793 
58794     /*
58795      * apply permutations
58796      */
58797     for(i=0; i<=n-1; i++)
58798     {
58799         for(j=n-2; j>=0; j--)
58800         {
58801             k = pivots->ptr.p_int[j];
58802             v = a->ptr.pp_double[i][j];
58803             a->ptr.pp_double[i][j] = a->ptr.pp_double[i][k];
58804             a->ptr.pp_double[i][k] = v;
58805         }
58806     }
58807     ae_frame_leave(_state);
58808 }
58809 
58810 
58811 /*************************************************************************
58812 Inversion of a general matrix.
58813 
58814 Input parameters:
58815     A       -   matrix.
58816     N       -   size of matrix A (optional) :
58817                 * if given, only principal NxN submatrix is processed  and
58818                   overwritten. other elements are unchanged.
58819                 * if not given,  size  is  automatically  determined  from
58820                   matrix size (A must be square matrix)
58821 
58822 Output parameters:
58823     Info    -   return code, same as in RMatrixLUInverse
58824     Rep     -   solver report, same as in RMatrixLUInverse
58825     A       -   inverse of matrix A, same as in RMatrixLUInverse
58826 
58827 Result:
58828     True, if the matrix is not singular.
58829     False, if the matrix is singular.
58830 
58831   ! FREE EDITION OF ALGLIB:
58832   !
58833   ! Free Edition of ALGLIB supports following important features for  this
58834   ! function:
58835   ! * C++ version: x64 SIMD support using C++ intrinsics
58836   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
58837   !
58838   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
58839   ! Reference Manual in order  to  find  out  how to activate SIMD support
58840   ! in ALGLIB.
58841 
58842   ! COMMERCIAL EDITION OF ALGLIB:
58843   !
58844   ! Commercial Edition of ALGLIB includes following important improvements
58845   ! of this function:
58846   ! * high-performance native backend with same C# interface (C# version)
58847   ! * multithreading support (C++ and C# versions)
58848   ! * hardware vendor (Intel) implementations of linear algebra primitives
58849   !   (C++ and C# versions, x86/x64 platform)
58850   !
58851   ! We recommend you to read 'Working with commercial version' section  of
58852   ! ALGLIB Reference Manual in order to find out how to  use  performance-
58853   ! related features provided by commercial edition of ALGLIB.
58854 
58855   -- ALGLIB --
58856      Copyright 2005-2010 by Bochkanov Sergey
58857 *************************************************************************/
rmatrixinverse(ae_matrix * a,ae_int_t n,ae_int_t * info,matinvreport * rep,ae_state * _state)58858 void rmatrixinverse(/* Real    */ ae_matrix* a,
58859      ae_int_t n,
58860      ae_int_t* info,
58861      matinvreport* rep,
58862      ae_state *_state)
58863 {
58864     ae_frame _frame_block;
58865     ae_vector pivots;
58866 
58867     ae_frame_make(_state, &_frame_block);
58868     memset(&pivots, 0, sizeof(pivots));
58869     *info = 0;
58870     _matinvreport_clear(rep);
58871     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
58872 
58873     ae_assert(n>0, "RMatrixInverse: N<=0!", _state);
58874     ae_assert(a->cols>=n, "RMatrixInverse: cols(A)<N!", _state);
58875     ae_assert(a->rows>=n, "RMatrixInverse: rows(A)<N!", _state);
58876     ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixInverse: A contains infinite or NaN values!", _state);
58877     rmatrixlu(a, n, n, &pivots, _state);
58878     rmatrixluinverse(a, &pivots, n, info, rep, _state);
58879     ae_frame_leave(_state);
58880 }
58881 
58882 
58883 /*************************************************************************
58884 Inversion of a matrix given by its LU decomposition.
58885 
58886 INPUT PARAMETERS:
58887     A       -   LU decomposition of the matrix
58888                 (output of CMatrixLU subroutine).
58889     Pivots  -   table of permutations
58890                 (the output of CMatrixLU subroutine).
58891     N       -   size of matrix A (optional) :
58892                 * if given, only principal NxN submatrix is processed  and
58893                   overwritten. other elements are unchanged.
58894                 * if not given,  size  is  automatically  determined  from
58895                   matrix size (A must be square matrix)
58896 
58897 OUTPUT PARAMETERS:
58898     Info    -   return code, same as in RMatrixLUInverse
58899     Rep     -   solver report, same as in RMatrixLUInverse
58900     A       -   inverse of matrix A, same as in RMatrixLUInverse
58901 
58902   ! FREE EDITION OF ALGLIB:
58903   !
58904   ! Free Edition of ALGLIB supports following important features for  this
58905   ! function:
58906   ! * C++ version: x64 SIMD support using C++ intrinsics
58907   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
58908   !
58909   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
58910   ! Reference Manual in order  to  find  out  how to activate SIMD support
58911   ! in ALGLIB.
58912 
58913   ! COMMERCIAL EDITION OF ALGLIB:
58914   !
58915   ! Commercial Edition of ALGLIB includes following important improvements
58916   ! of this function:
58917   ! * high-performance native backend with same C# interface (C# version)
58918   ! * multithreading support (C++ and C# versions)
58919   ! * hardware vendor (Intel) implementations of linear algebra primitives
58920   !   (C++ and C# versions, x86/x64 platform)
58921   !
58922   ! We recommend you to read 'Working with commercial version' section  of
58923   ! ALGLIB Reference Manual in order to find out how to  use  performance-
58924   ! related features provided by commercial edition of ALGLIB.
58925 
58926   -- ALGLIB routine --
58927      05.02.2010
58928      Bochkanov Sergey
58929 *************************************************************************/
cmatrixluinverse(ae_matrix * a,ae_vector * pivots,ae_int_t n,ae_int_t * info,matinvreport * rep,ae_state * _state)58930 void cmatrixluinverse(/* Complex */ ae_matrix* a,
58931      /* Integer */ ae_vector* pivots,
58932      ae_int_t n,
58933      ae_int_t* info,
58934      matinvreport* rep,
58935      ae_state *_state)
58936 {
58937     ae_frame _frame_block;
58938     ae_vector work;
58939     ae_int_t i;
58940     ae_int_t j;
58941     ae_int_t k;
58942     ae_complex v;
58943     sinteger sinfo;
58944 
58945     ae_frame_make(_state, &_frame_block);
58946     memset(&work, 0, sizeof(work));
58947     memset(&sinfo, 0, sizeof(sinfo));
58948     *info = 0;
58949     _matinvreport_clear(rep);
58950     ae_vector_init(&work, 0, DT_COMPLEX, _state, ae_true);
58951     _sinteger_init(&sinfo, _state, ae_true);
58952 
58953     ae_assert(n>0, "CMatrixLUInverse: N<=0!", _state);
58954     ae_assert(a->cols>=n, "CMatrixLUInverse: cols(A)<N!", _state);
58955     ae_assert(a->rows>=n, "CMatrixLUInverse: rows(A)<N!", _state);
58956     ae_assert(pivots->cnt>=n, "CMatrixLUInverse: len(Pivots)<N!", _state);
58957     ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixLUInverse: A contains infinite or NaN values!", _state);
58958     *info = 1;
58959     for(i=0; i<=n-1; i++)
58960     {
58961         if( pivots->ptr.p_int[i]>n-1||pivots->ptr.p_int[i]<i )
58962         {
58963             *info = -1;
58964         }
58965     }
58966     ae_assert(*info>0, "CMatrixLUInverse: incorrect Pivots array!", _state);
58967 
58968     /*
58969      * calculate condition numbers
58970      */
58971     rep->r1 = cmatrixlurcond1(a, n, _state);
58972     rep->rinf = cmatrixlurcondinf(a, n, _state);
58973     if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
58974     {
58975         for(i=0; i<=n-1; i++)
58976         {
58977             for(j=0; j<=n-1; j++)
58978             {
58979                 a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
58980             }
58981         }
58982         rep->r1 = (double)(0);
58983         rep->rinf = (double)(0);
58984         *info = -3;
58985         ae_frame_leave(_state);
58986         return;
58987     }
58988 
58989     /*
58990      * Call cache-oblivious code
58991      */
58992     ae_vector_set_length(&work, n, _state);
58993     sinfo.val = 1;
58994     matinv_cmatrixluinverserec(a, 0, n, &work, &sinfo, rep, _state);
58995     *info = sinfo.val;
58996 
58997     /*
58998      * apply permutations
58999      */
59000     for(i=0; i<=n-1; i++)
59001     {
59002         for(j=n-2; j>=0; j--)
59003         {
59004             k = pivots->ptr.p_int[j];
59005             v = a->ptr.pp_complex[i][j];
59006             a->ptr.pp_complex[i][j] = a->ptr.pp_complex[i][k];
59007             a->ptr.pp_complex[i][k] = v;
59008         }
59009     }
59010     ae_frame_leave(_state);
59011 }
59012 
59013 
59014 /*************************************************************************
59015 Inversion of a general matrix.
59016 
59017 Input parameters:
59018     A       -   matrix
59019     N       -   size of matrix A (optional) :
59020                 * if given, only principal NxN submatrix is processed  and
59021                   overwritten. other elements are unchanged.
59022                 * if not given,  size  is  automatically  determined  from
59023                   matrix size (A must be square matrix)
59024 
59025 Output parameters:
59026     Info    -   return code, same as in RMatrixLUInverse
59027     Rep     -   solver report, same as in RMatrixLUInverse
59028     A       -   inverse of matrix A, same as in RMatrixLUInverse
59029 
59030   ! FREE EDITION OF ALGLIB:
59031   !
59032   ! Free Edition of ALGLIB supports following important features for  this
59033   ! function:
59034   ! * C++ version: x64 SIMD support using C++ intrinsics
59035   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
59036   !
59037   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
59038   ! Reference Manual in order  to  find  out  how to activate SIMD support
59039   ! in ALGLIB.
59040 
59041   ! COMMERCIAL EDITION OF ALGLIB:
59042   !
59043   ! Commercial Edition of ALGLIB includes following important improvements
59044   ! of this function:
59045   ! * high-performance native backend with same C# interface (C# version)
59046   ! * multithreading support (C++ and C# versions)
59047   ! * hardware vendor (Intel) implementations of linear algebra primitives
59048   !   (C++ and C# versions, x86/x64 platform)
59049   !
59050   ! We recommend you to read 'Working with commercial version' section  of
59051   ! ALGLIB Reference Manual in order to find out how to  use  performance-
59052   ! related features provided by commercial edition of ALGLIB.
59053 
59054   -- ALGLIB --
59055      Copyright 2005 by Bochkanov Sergey
59056 *************************************************************************/
cmatrixinverse(ae_matrix * a,ae_int_t n,ae_int_t * info,matinvreport * rep,ae_state * _state)59057 void cmatrixinverse(/* Complex */ ae_matrix* a,
59058      ae_int_t n,
59059      ae_int_t* info,
59060      matinvreport* rep,
59061      ae_state *_state)
59062 {
59063     ae_frame _frame_block;
59064     ae_vector pivots;
59065 
59066     ae_frame_make(_state, &_frame_block);
59067     memset(&pivots, 0, sizeof(pivots));
59068     *info = 0;
59069     _matinvreport_clear(rep);
59070     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
59071 
59072     ae_assert(n>0, "CRMatrixInverse: N<=0!", _state);
59073     ae_assert(a->cols>=n, "CRMatrixInverse: cols(A)<N!", _state);
59074     ae_assert(a->rows>=n, "CRMatrixInverse: rows(A)<N!", _state);
59075     ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixInverse: A contains infinite or NaN values!", _state);
59076     cmatrixlu(a, n, n, &pivots, _state);
59077     cmatrixluinverse(a, &pivots, n, info, rep, _state);
59078     ae_frame_leave(_state);
59079 }
59080 
59081 
59082 /*************************************************************************
59083 Inversion of a symmetric positive definite matrix which is given
59084 by Cholesky decomposition.
59085 
59086 Input parameters:
59087     A       -   Cholesky decomposition of the matrix to be inverted:
59088                 A=U'*U or A = L*L'.
59089                 Output of  SPDMatrixCholesky subroutine.
59090     N       -   size of matrix A (optional) :
59091                 * if given, only principal NxN submatrix is processed  and
59092                   overwritten. other elements are unchanged.
59093                 * if not given,  size  is  automatically  determined  from
59094                   matrix size (A must be square matrix)
59095     IsUpper -   storage type (optional):
59096                 * if True, symmetric  matrix  A  is  given  by  its  upper
59097                   triangle, and the lower triangle isn't  used/changed  by
59098                   function
59099                 * if False,  symmetric matrix  A  is  given  by  its lower
59100                   triangle, and the  upper triangle isn't used/changed  by
59101                   function
59102                 * if not given, lower half is used.
59103 
59104 Output parameters:
59105     Info    -   return code, same as in RMatrixLUInverse
59106     Rep     -   solver report, same as in RMatrixLUInverse
59107     A       -   inverse of matrix A, same as in RMatrixLUInverse
59108 
59109   ! FREE EDITION OF ALGLIB:
59110   !
59111   ! Free Edition of ALGLIB supports following important features for  this
59112   ! function:
59113   ! * C++ version: x64 SIMD support using C++ intrinsics
59114   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
59115   !
59116   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
59117   ! Reference Manual in order  to  find  out  how to activate SIMD support
59118   ! in ALGLIB.
59119 
59120   ! COMMERCIAL EDITION OF ALGLIB:
59121   !
59122   ! Commercial Edition of ALGLIB includes following important improvements
59123   ! of this function:
59124   ! * high-performance native backend with same C# interface (C# version)
59125   ! * multithreading support (C++ and C# versions)
59126   ! * hardware vendor (Intel) implementations of linear algebra primitives
59127   !   (C++ and C# versions, x86/x64 platform)
59128   !
59129   ! We recommend you to read 'Working with commercial version' section  of
59130   ! ALGLIB Reference Manual in order to find out how to  use  performance-
59131   ! related features provided by commercial edition of ALGLIB.
59132 
59133   -- ALGLIB routine --
59134      10.02.2010
59135      Bochkanov Sergey
59136 *************************************************************************/
spdmatrixcholeskyinverse(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_int_t * info,matinvreport * rep,ae_state * _state)59137 void spdmatrixcholeskyinverse(/* Real    */ ae_matrix* a,
59138      ae_int_t n,
59139      ae_bool isupper,
59140      ae_int_t* info,
59141      matinvreport* rep,
59142      ae_state *_state)
59143 {
59144     ae_frame _frame_block;
59145     ae_int_t i;
59146     ae_int_t j;
59147     ae_vector tmp;
59148     matinvreport rep2;
59149     ae_bool f;
59150 
59151     ae_frame_make(_state, &_frame_block);
59152     memset(&tmp, 0, sizeof(tmp));
59153     memset(&rep2, 0, sizeof(rep2));
59154     *info = 0;
59155     _matinvreport_clear(rep);
59156     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
59157     _matinvreport_init(&rep2, _state, ae_true);
59158 
59159     ae_assert(n>0, "SPDMatrixCholeskyInverse: N<=0!", _state);
59160     ae_assert(a->cols>=n, "SPDMatrixCholeskyInverse: cols(A)<N!", _state);
59161     ae_assert(a->rows>=n, "SPDMatrixCholeskyInverse: rows(A)<N!", _state);
59162     *info = 1;
59163     f = ae_true;
59164     for(i=0; i<=n-1; i++)
59165     {
59166         f = f&&ae_isfinite(a->ptr.pp_double[i][i], _state);
59167     }
59168     ae_assert(f, "SPDMatrixCholeskyInverse: A contains infinite or NaN values!", _state);
59169 
59170     /*
59171      * calculate condition numbers
59172      */
59173     rep->r1 = spdmatrixcholeskyrcond(a, n, isupper, _state);
59174     rep->rinf = rep->r1;
59175     if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
59176     {
59177         if( isupper )
59178         {
59179             for(i=0; i<=n-1; i++)
59180             {
59181                 for(j=i; j<=n-1; j++)
59182                 {
59183                     a->ptr.pp_double[i][j] = (double)(0);
59184                 }
59185             }
59186         }
59187         else
59188         {
59189             for(i=0; i<=n-1; i++)
59190             {
59191                 for(j=0; j<=i; j++)
59192                 {
59193                     a->ptr.pp_double[i][j] = (double)(0);
59194                 }
59195             }
59196         }
59197         rep->r1 = (double)(0);
59198         rep->rinf = (double)(0);
59199         *info = -3;
59200         ae_frame_leave(_state);
59201         return;
59202     }
59203 
59204     /*
59205      * Inverse
59206      */
59207     ae_vector_set_length(&tmp, n, _state);
59208     spdmatrixcholeskyinverserec(a, 0, n, isupper, &tmp, _state);
59209     ae_frame_leave(_state);
59210 }
59211 
59212 
59213 /*************************************************************************
59214 Inversion of a symmetric positive definite matrix.
59215 
59216 Given an upper or lower triangle of a symmetric positive definite matrix,
59217 the algorithm generates matrix A^-1 and saves the upper or lower triangle
59218 depending on the input.
59219 
59220 Input parameters:
59221     A       -   matrix to be inverted (upper or lower triangle).
59222                 Array with elements [0..N-1,0..N-1].
59223     N       -   size of matrix A (optional) :
59224                 * if given, only principal NxN submatrix is processed  and
59225                   overwritten. other elements are unchanged.
59226                 * if not given,  size  is  automatically  determined  from
59227                   matrix size (A must be square matrix)
59228     IsUpper -   storage type (optional):
59229                 * if True, symmetric  matrix  A  is  given  by  its  upper
59230                   triangle, and the lower triangle isn't  used/changed  by
59231                   function
59232                 * if False,  symmetric matrix  A  is  given  by  its lower
59233                   triangle, and the  upper triangle isn't used/changed  by
59234                   function
59235                 * if not given,  both lower and upper  triangles  must  be
59236                   filled.
59237 
59238 Output parameters:
59239     Info    -   return code, same as in RMatrixLUInverse
59240     Rep     -   solver report, same as in RMatrixLUInverse
59241     A       -   inverse of matrix A, same as in RMatrixLUInverse
59242 
59243   ! FREE EDITION OF ALGLIB:
59244   !
59245   ! Free Edition of ALGLIB supports following important features for  this
59246   ! function:
59247   ! * C++ version: x64 SIMD support using C++ intrinsics
59248   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
59249   !
59250   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
59251   ! Reference Manual in order  to  find  out  how to activate SIMD support
59252   ! in ALGLIB.
59253 
59254   ! COMMERCIAL EDITION OF ALGLIB:
59255   !
59256   ! Commercial Edition of ALGLIB includes following important improvements
59257   ! of this function:
59258   ! * high-performance native backend with same C# interface (C# version)
59259   ! * multithreading support (C++ and C# versions)
59260   ! * hardware vendor (Intel) implementations of linear algebra primitives
59261   !   (C++ and C# versions, x86/x64 platform)
59262   !
59263   ! We recommend you to read 'Working with commercial version' section  of
59264   ! ALGLIB Reference Manual in order to find out how to  use  performance-
59265   ! related features provided by commercial edition of ALGLIB.
59266 
59267   -- ALGLIB routine --
59268      10.02.2010
59269      Bochkanov Sergey
59270 *************************************************************************/
spdmatrixinverse(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_int_t * info,matinvreport * rep,ae_state * _state)59271 void spdmatrixinverse(/* Real    */ ae_matrix* a,
59272      ae_int_t n,
59273      ae_bool isupper,
59274      ae_int_t* info,
59275      matinvreport* rep,
59276      ae_state *_state)
59277 {
59278 
59279     *info = 0;
59280     _matinvreport_clear(rep);
59281 
59282     ae_assert(n>0, "SPDMatrixInverse: N<=0!", _state);
59283     ae_assert(a->cols>=n, "SPDMatrixInverse: cols(A)<N!", _state);
59284     ae_assert(a->rows>=n, "SPDMatrixInverse: rows(A)<N!", _state);
59285     ae_assert(isfinitertrmatrix(a, n, isupper, _state), "SPDMatrixInverse: A contains infinite or NaN values!", _state);
59286     *info = 1;
59287     if( spdmatrixcholesky(a, n, isupper, _state) )
59288     {
59289         spdmatrixcholeskyinverse(a, n, isupper, info, rep, _state);
59290     }
59291     else
59292     {
59293         *info = -3;
59294     }
59295 }
59296 
59297 
59298 /*************************************************************************
59299 Inversion of a Hermitian positive definite matrix which is given
59300 by Cholesky decomposition.
59301 
59302 Input parameters:
59303     A       -   Cholesky decomposition of the matrix to be inverted:
59304                 A=U'*U or A = L*L'.
59305                 Output of  HPDMatrixCholesky subroutine.
59306     N       -   size of matrix A (optional) :
59307                 * if given, only principal NxN submatrix is processed  and
59308                   overwritten. other elements are unchanged.
59309                 * if not given,  size  is  automatically  determined  from
59310                   matrix size (A must be square matrix)
59311     IsUpper -   storage type (optional):
59312                 * if True, symmetric  matrix  A  is  given  by  its  upper
59313                   triangle, and the lower triangle isn't  used/changed  by
59314                   function
59315                 * if False,  symmetric matrix  A  is  given  by  its lower
59316                   triangle, and the  upper triangle isn't used/changed  by
59317                   function
59318                 * if not given, lower half is used.
59319 
59320 Output parameters:
59321     Info    -   return code, same as in RMatrixLUInverse
59322     Rep     -   solver report, same as in RMatrixLUInverse
59323     A       -   inverse of matrix A, same as in RMatrixLUInverse
59324 
59325   ! FREE EDITION OF ALGLIB:
59326   !
59327   ! Free Edition of ALGLIB supports following important features for  this
59328   ! function:
59329   ! * C++ version: x64 SIMD support using C++ intrinsics
59330   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
59331   !
59332   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
59333   ! Reference Manual in order  to  find  out  how to activate SIMD support
59334   ! in ALGLIB.
59335 
59336   ! COMMERCIAL EDITION OF ALGLIB:
59337   !
59338   ! Commercial Edition of ALGLIB includes following important improvements
59339   ! of this function:
59340   ! * high-performance native backend with same C# interface (C# version)
59341   ! * multithreading support (C++ and C# versions)
59342   ! * hardware vendor (Intel) implementations of linear algebra primitives
59343   !   (C++ and C# versions, x86/x64 platform)
59344   !
59345   ! We recommend you to read 'Working with commercial version' section  of
59346   ! ALGLIB Reference Manual in order to find out how to  use  performance-
59347   ! related features provided by commercial edition of ALGLIB.
59348 
59349   -- ALGLIB routine --
59350      10.02.2010
59351      Bochkanov Sergey
59352 *************************************************************************/
hpdmatrixcholeskyinverse(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_int_t * info,matinvreport * rep,ae_state * _state)59353 void hpdmatrixcholeskyinverse(/* Complex */ ae_matrix* a,
59354      ae_int_t n,
59355      ae_bool isupper,
59356      ae_int_t* info,
59357      matinvreport* rep,
59358      ae_state *_state)
59359 {
59360     ae_frame _frame_block;
59361     ae_int_t i;
59362     ae_int_t j;
59363     matinvreport rep2;
59364     ae_vector tmp;
59365     ae_bool f;
59366 
59367     ae_frame_make(_state, &_frame_block);
59368     memset(&rep2, 0, sizeof(rep2));
59369     memset(&tmp, 0, sizeof(tmp));
59370     *info = 0;
59371     _matinvreport_clear(rep);
59372     _matinvreport_init(&rep2, _state, ae_true);
59373     ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
59374 
59375     ae_assert(n>0, "HPDMatrixCholeskyInverse: N<=0!", _state);
59376     ae_assert(a->cols>=n, "HPDMatrixCholeskyInverse: cols(A)<N!", _state);
59377     ae_assert(a->rows>=n, "HPDMatrixCholeskyInverse: rows(A)<N!", _state);
59378     f = ae_true;
59379     for(i=0; i<=n-1; i++)
59380     {
59381         f = (f&&ae_isfinite(a->ptr.pp_complex[i][i].x, _state))&&ae_isfinite(a->ptr.pp_complex[i][i].y, _state);
59382     }
59383     ae_assert(f, "HPDMatrixCholeskyInverse: A contains infinite or NaN values!", _state);
59384     *info = 1;
59385 
59386     /*
59387      * calculate condition numbers
59388      */
59389     rep->r1 = hpdmatrixcholeskyrcond(a, n, isupper, _state);
59390     rep->rinf = rep->r1;
59391     if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
59392     {
59393         if( isupper )
59394         {
59395             for(i=0; i<=n-1; i++)
59396             {
59397                 for(j=i; j<=n-1; j++)
59398                 {
59399                     a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
59400                 }
59401             }
59402         }
59403         else
59404         {
59405             for(i=0; i<=n-1; i++)
59406             {
59407                 for(j=0; j<=i; j++)
59408                 {
59409                     a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
59410                 }
59411             }
59412         }
59413         rep->r1 = (double)(0);
59414         rep->rinf = (double)(0);
59415         *info = -3;
59416         ae_frame_leave(_state);
59417         return;
59418     }
59419 
59420     /*
59421      * Inverse
59422      */
59423     ae_vector_set_length(&tmp, n, _state);
59424     matinv_hpdmatrixcholeskyinverserec(a, 0, n, isupper, &tmp, _state);
59425     ae_frame_leave(_state);
59426 }
59427 
59428 
59429 /*************************************************************************
59430 Inversion of a Hermitian positive definite matrix.
59431 
59432 Given an upper or lower triangle of a Hermitian positive definite matrix,
59433 the algorithm generates matrix A^-1 and saves the upper or lower triangle
59434 depending on the input.
59435 
59436 Input parameters:
59437     A       -   matrix to be inverted (upper or lower triangle).
59438                 Array with elements [0..N-1,0..N-1].
59439     N       -   size of matrix A (optional) :
59440                 * if given, only principal NxN submatrix is processed  and
59441                   overwritten. other elements are unchanged.
59442                 * if not given,  size  is  automatically  determined  from
59443                   matrix size (A must be square matrix)
59444     IsUpper -   storage type (optional):
59445                 * if True, symmetric  matrix  A  is  given  by  its  upper
59446                   triangle, and the lower triangle isn't  used/changed  by
59447                   function
59448                 * if False,  symmetric matrix  A  is  given  by  its lower
59449                   triangle, and the  upper triangle isn't used/changed  by
59450                   function
59451                 * if not given,  both lower and upper  triangles  must  be
59452                   filled.
59453 
59454 Output parameters:
59455     Info    -   return code, same as in RMatrixLUInverse
59456     Rep     -   solver report, same as in RMatrixLUInverse
59457     A       -   inverse of matrix A, same as in RMatrixLUInverse
59458 
59459   ! FREE EDITION OF ALGLIB:
59460   !
59461   ! Free Edition of ALGLIB supports following important features for  this
59462   ! function:
59463   ! * C++ version: x64 SIMD support using C++ intrinsics
59464   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
59465   !
59466   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
59467   ! Reference Manual in order  to  find  out  how to activate SIMD support
59468   ! in ALGLIB.
59469 
59470   ! COMMERCIAL EDITION OF ALGLIB:
59471   !
59472   ! Commercial Edition of ALGLIB includes following important improvements
59473   ! of this function:
59474   ! * high-performance native backend with same C# interface (C# version)
59475   ! * multithreading support (C++ and C# versions)
59476   ! * hardware vendor (Intel) implementations of linear algebra primitives
59477   !   (C++ and C# versions, x86/x64 platform)
59478   !
59479   ! We recommend you to read 'Working with commercial version' section  of
59480   ! ALGLIB Reference Manual in order to find out how to  use  performance-
59481   ! related features provided by commercial edition of ALGLIB.
59482 
59483   -- ALGLIB routine --
59484      10.02.2010
59485      Bochkanov Sergey
59486 *************************************************************************/
hpdmatrixinverse(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_int_t * info,matinvreport * rep,ae_state * _state)59487 void hpdmatrixinverse(/* Complex */ ae_matrix* a,
59488      ae_int_t n,
59489      ae_bool isupper,
59490      ae_int_t* info,
59491      matinvreport* rep,
59492      ae_state *_state)
59493 {
59494 
59495     *info = 0;
59496     _matinvreport_clear(rep);
59497 
59498     ae_assert(n>0, "HPDMatrixInverse: N<=0!", _state);
59499     ae_assert(a->cols>=n, "HPDMatrixInverse: cols(A)<N!", _state);
59500     ae_assert(a->rows>=n, "HPDMatrixInverse: rows(A)<N!", _state);
59501     ae_assert(apservisfinitectrmatrix(a, n, isupper, _state), "HPDMatrixInverse: A contains infinite or NaN values!", _state);
59502     *info = 1;
59503     if( hpdmatrixcholesky(a, n, isupper, _state) )
59504     {
59505         hpdmatrixcholeskyinverse(a, n, isupper, info, rep, _state);
59506     }
59507     else
59508     {
59509         *info = -3;
59510     }
59511 }
59512 
59513 
59514 /*************************************************************************
59515 Triangular matrix inverse (real)
59516 
59517 The subroutine inverts the following types of matrices:
59518     * upper triangular
59519     * upper triangular with unit diagonal
59520     * lower triangular
59521     * lower triangular with unit diagonal
59522 
59523 In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
59524 also be upper (lower) triangular, and after the end of the algorithm,  the
59525 inverse matrix replaces the source matrix. The elements  below (above) the
59526 main diagonal are not changed by the algorithm.
59527 
59528 If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
59529 diagonal, and the diagonal elements are not passed to the algorithm.
59530 
59531 Input parameters:
59532     A       -   matrix, array[0..N-1, 0..N-1].
59533     N       -   size of matrix A (optional) :
59534                 * if given, only principal NxN submatrix is processed  and
59535                   overwritten. other elements are unchanged.
59536                 * if not given,  size  is  automatically  determined  from
59537                   matrix size (A must be square matrix)
59538     IsUpper -   True, if the matrix is upper triangular.
59539     IsUnit  -   diagonal type (optional):
59540                 * if True, matrix has unit diagonal (a[i,i] are NOT used)
59541                 * if False, matrix diagonal is arbitrary
59542                 * if not given, False is assumed
59543 
59544 Output parameters:
59545     Info    -   same as for RMatrixLUInverse
59546     Rep     -   same as for RMatrixLUInverse
59547     A       -   same as for RMatrixLUInverse.
59548 
59549   ! FREE EDITION OF ALGLIB:
59550   !
59551   ! Free Edition of ALGLIB supports following important features for  this
59552   ! function:
59553   ! * C++ version: x64 SIMD support using C++ intrinsics
59554   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
59555   !
59556   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
59557   ! Reference Manual in order  to  find  out  how to activate SIMD support
59558   ! in ALGLIB.
59559 
59560   ! COMMERCIAL EDITION OF ALGLIB:
59561   !
59562   ! Commercial Edition of ALGLIB includes following important improvements
59563   ! of this function:
59564   ! * high-performance native backend with same C# interface (C# version)
59565   ! * multithreading support (C++ and C# versions)
59566   ! * hardware vendor (Intel) implementations of linear algebra primitives
59567   !   (C++ and C# versions, x86/x64 platform)
59568   !
59569   ! We recommend you to read 'Working with commercial version' section  of
59570   ! ALGLIB Reference Manual in order to find out how to  use  performance-
59571   ! related features provided by commercial edition of ALGLIB.
59572 
59573   -- ALGLIB --
59574      Copyright 05.02.2010 by Bochkanov Sergey
59575 *************************************************************************/
rmatrixtrinverse(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_int_t * info,matinvreport * rep,ae_state * _state)59576 void rmatrixtrinverse(/* Real    */ ae_matrix* a,
59577      ae_int_t n,
59578      ae_bool isupper,
59579      ae_bool isunit,
59580      ae_int_t* info,
59581      matinvreport* rep,
59582      ae_state *_state)
59583 {
59584     ae_frame _frame_block;
59585     ae_int_t i;
59586     ae_int_t j;
59587     ae_vector tmp;
59588     sinteger sinfo;
59589 
59590     ae_frame_make(_state, &_frame_block);
59591     memset(&tmp, 0, sizeof(tmp));
59592     memset(&sinfo, 0, sizeof(sinfo));
59593     *info = 0;
59594     _matinvreport_clear(rep);
59595     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
59596     _sinteger_init(&sinfo, _state, ae_true);
59597 
59598     ae_assert(n>0, "RMatrixTRInverse: N<=0!", _state);
59599     ae_assert(a->cols>=n, "RMatrixTRInverse: cols(A)<N!", _state);
59600     ae_assert(a->rows>=n, "RMatrixTRInverse: rows(A)<N!", _state);
59601     ae_assert(isfinitertrmatrix(a, n, isupper, _state), "RMatrixTRInverse: A contains infinite or NaN values!", _state);
59602 
59603     /*
59604      * calculate condition numbers
59605      */
59606     rep->r1 = rmatrixtrrcond1(a, n, isupper, isunit, _state);
59607     rep->rinf = rmatrixtrrcondinf(a, n, isupper, isunit, _state);
59608     if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
59609     {
59610         for(i=0; i<=n-1; i++)
59611         {
59612             for(j=0; j<=n-1; j++)
59613             {
59614                 a->ptr.pp_double[i][j] = (double)(0);
59615             }
59616         }
59617         rep->r1 = (double)(0);
59618         rep->rinf = (double)(0);
59619         *info = -3;
59620         ae_frame_leave(_state);
59621         return;
59622     }
59623 
59624     /*
59625      * Invert
59626      */
59627     ae_vector_set_length(&tmp, n, _state);
59628     sinfo.val = 1;
59629     matinv_rmatrixtrinverserec(a, 0, n, isupper, isunit, &tmp, &sinfo, _state);
59630     *info = sinfo.val;
59631     ae_frame_leave(_state);
59632 }
59633 
59634 
59635 /*************************************************************************
59636 Triangular matrix inverse (complex)
59637 
59638 The subroutine inverts the following types of matrices:
59639     * upper triangular
59640     * upper triangular with unit diagonal
59641     * lower triangular
59642     * lower triangular with unit diagonal
59643 
59644 In case of an upper (lower) triangular matrix,  the  inverse  matrix  will
59645 also be upper (lower) triangular, and after the end of the algorithm,  the
59646 inverse matrix replaces the source matrix. The elements  below (above) the
59647 main diagonal are not changed by the algorithm.
59648 
59649 If  the matrix  has a unit diagonal, the inverse matrix also  has  a  unit
59650 diagonal, and the diagonal elements are not passed to the algorithm.
59651 
59652 Input parameters:
59653     A       -   matrix, array[0..N-1, 0..N-1].
59654     N       -   size of matrix A (optional) :
59655                 * if given, only principal NxN submatrix is processed  and
59656                   overwritten. other elements are unchanged.
59657                 * if not given,  size  is  automatically  determined  from
59658                   matrix size (A must be square matrix)
59659     IsUpper -   True, if the matrix is upper triangular.
59660     IsUnit  -   diagonal type (optional):
59661                 * if True, matrix has unit diagonal (a[i,i] are NOT used)
59662                 * if False, matrix diagonal is arbitrary
59663                 * if not given, False is assumed
59664 
59665 Output parameters:
59666     Info    -   same as for RMatrixLUInverse
59667     Rep     -   same as for RMatrixLUInverse
59668     A       -   same as for RMatrixLUInverse.
59669 
59670   ! FREE EDITION OF ALGLIB:
59671   !
59672   ! Free Edition of ALGLIB supports following important features for  this
59673   ! function:
59674   ! * C++ version: x64 SIMD support using C++ intrinsics
59675   ! * C#  version: x64 SIMD support using NET5/NetCore hardware intrinsics
59676   !
59677   ! We  recommend  you  to  read  'Compiling ALGLIB' section of the ALGLIB
59678   ! Reference Manual in order  to  find  out  how to activate SIMD support
59679   ! in ALGLIB.
59680 
59681   ! COMMERCIAL EDITION OF ALGLIB:
59682   !
59683   ! Commercial Edition of ALGLIB includes following important improvements
59684   ! of this function:
59685   ! * high-performance native backend with same C# interface (C# version)
59686   ! * multithreading support (C++ and C# versions)
59687   ! * hardware vendor (Intel) implementations of linear algebra primitives
59688   !   (C++ and C# versions, x86/x64 platform)
59689   !
59690   ! We recommend you to read 'Working with commercial version' section  of
59691   ! ALGLIB Reference Manual in order to find out how to  use  performance-
59692   ! related features provided by commercial edition of ALGLIB.
59693 
59694   -- ALGLIB --
59695      Copyright 05.02.2010 by Bochkanov Sergey
59696 *************************************************************************/
cmatrixtrinverse(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_int_t * info,matinvreport * rep,ae_state * _state)59697 void cmatrixtrinverse(/* Complex */ ae_matrix* a,
59698      ae_int_t n,
59699      ae_bool isupper,
59700      ae_bool isunit,
59701      ae_int_t* info,
59702      matinvreport* rep,
59703      ae_state *_state)
59704 {
59705     ae_frame _frame_block;
59706     ae_int_t i;
59707     ae_int_t j;
59708     ae_vector tmp;
59709     sinteger sinfo;
59710 
59711     ae_frame_make(_state, &_frame_block);
59712     memset(&tmp, 0, sizeof(tmp));
59713     memset(&sinfo, 0, sizeof(sinfo));
59714     *info = 0;
59715     _matinvreport_clear(rep);
59716     ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
59717     _sinteger_init(&sinfo, _state, ae_true);
59718 
59719     ae_assert(n>0, "CMatrixTRInverse: N<=0!", _state);
59720     ae_assert(a->cols>=n, "CMatrixTRInverse: cols(A)<N!", _state);
59721     ae_assert(a->rows>=n, "CMatrixTRInverse: rows(A)<N!", _state);
59722     ae_assert(apservisfinitectrmatrix(a, n, isupper, _state), "CMatrixTRInverse: A contains infinite or NaN values!", _state);
59723 
59724     /*
59725      * calculate condition numbers
59726      */
59727     rep->r1 = cmatrixtrrcond1(a, n, isupper, isunit, _state);
59728     rep->rinf = cmatrixtrrcondinf(a, n, isupper, isunit, _state);
59729     if( ae_fp_less(rep->r1,rcondthreshold(_state))||ae_fp_less(rep->rinf,rcondthreshold(_state)) )
59730     {
59731         for(i=0; i<=n-1; i++)
59732         {
59733             for(j=0; j<=n-1; j++)
59734             {
59735                 a->ptr.pp_complex[i][j] = ae_complex_from_i(0);
59736             }
59737         }
59738         rep->r1 = (double)(0);
59739         rep->rinf = (double)(0);
59740         *info = -3;
59741         ae_frame_leave(_state);
59742         return;
59743     }
59744 
59745     /*
59746      * Invert
59747      */
59748     ae_vector_set_length(&tmp, n, _state);
59749     sinfo.val = 1;
59750     matinv_cmatrixtrinverserec(a, 0, n, isupper, isunit, &tmp, &sinfo, _state);
59751     *info = sinfo.val;
59752     ae_frame_leave(_state);
59753 }
59754 
59755 
59756 /*************************************************************************
59757 Recursive subroutine for SPD inversion.
59758 
59759 NOTE: this function expects that matris is strictly positive-definite.
59760 
59761   -- ALGLIB routine --
59762      10.02.2010
59763      Bochkanov Sergey
59764 *************************************************************************/
spdmatrixcholeskyinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_vector * tmp,ae_state * _state)59765 void spdmatrixcholeskyinverserec(/* Real    */ ae_matrix* a,
59766      ae_int_t offs,
59767      ae_int_t n,
59768      ae_bool isupper,
59769      /* Real    */ ae_vector* tmp,
59770      ae_state *_state)
59771 {
59772     ae_frame _frame_block;
59773     ae_int_t i;
59774     ae_int_t j;
59775     double v;
59776     ae_int_t n1;
59777     ae_int_t n2;
59778     sinteger sinfo2;
59779     ae_int_t tsa;
59780     ae_int_t tsb;
59781     ae_int_t tscur;
59782 
59783     ae_frame_make(_state, &_frame_block);
59784     memset(&sinfo2, 0, sizeof(sinfo2));
59785     _sinteger_init(&sinfo2, _state, ae_true);
59786 
59787     if( n<1 )
59788     {
59789         ae_frame_leave(_state);
59790         return;
59791     }
59792     tsa = matrixtilesizea(_state);
59793     tsb = matrixtilesizeb(_state);
59794     tscur = tsb;
59795     if( n<=tsb )
59796     {
59797         tscur = tsa;
59798     }
59799 
59800     /*
59801      * Base case
59802      */
59803     if( n<=tsa )
59804     {
59805         sinfo2.val = 1;
59806         matinv_rmatrixtrinverserec(a, offs, n, isupper, ae_false, tmp, &sinfo2, _state);
59807         ae_assert(sinfo2.val>0, "SPDMatrixCholeskyInverseRec: integrity check failed", _state);
59808         if( isupper )
59809         {
59810 
59811             /*
59812              * Compute the product U * U'.
59813              * NOTE: we never assume that diagonal of U is real
59814              */
59815             for(i=0; i<=n-1; i++)
59816             {
59817                 if( i==0 )
59818                 {
59819 
59820                     /*
59821                      * 1x1 matrix
59822                      */
59823                     a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
59824                 }
59825                 else
59826                 {
59827 
59828                     /*
59829                      * (I+1)x(I+1) matrix,
59830                      *
59831                      * ( A11  A12 )   ( A11^H        )   ( A11*A11^H+A12*A12^H  A12*A22^H )
59832                      * (          ) * (              ) = (                                )
59833                      * (      A22 )   ( A12^H  A22^H )   ( A22*A12^H            A22*A22^H )
59834                      *
59835                      * A11 is IxI, A22 is 1x1.
59836                      */
59837                     ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs][offs+i], a->stride, ae_v_len(0,i-1));
59838                     for(j=0; j<=i-1; j++)
59839                     {
59840                         v = a->ptr.pp_double[offs+j][offs+i];
59841                         ae_v_addd(&a->ptr.pp_double[offs+j][offs+j], 1, &tmp->ptr.p_double[j], 1, ae_v_len(offs+j,offs+i-1), v);
59842                     }
59843                     v = a->ptr.pp_double[offs+i][offs+i];
59844                     ae_v_muld(&a->ptr.pp_double[offs][offs+i], a->stride, ae_v_len(offs,offs+i-1), v);
59845                     a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
59846                 }
59847             }
59848         }
59849         else
59850         {
59851 
59852             /*
59853              * Compute the product L' * L
59854              * NOTE: we never assume that diagonal of L is real
59855              */
59856             for(i=0; i<=n-1; i++)
59857             {
59858                 if( i==0 )
59859                 {
59860 
59861                     /*
59862                      * 1x1 matrix
59863                      */
59864                     a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
59865                 }
59866                 else
59867                 {
59868 
59869                     /*
59870                      * (I+1)x(I+1) matrix,
59871                      *
59872                      * ( A11^H  A21^H )   ( A11      )   ( A11^H*A11+A21^H*A21  A21^H*A22 )
59873                      * (              ) * (          ) = (                                )
59874                      * (        A22^H )   ( A21  A22 )   ( A22^H*A21            A22^H*A22 )
59875                      *
59876                      * A11 is IxI, A22 is 1x1.
59877                      */
59878                     ae_v_move(&tmp->ptr.p_double[0], 1, &a->ptr.pp_double[offs+i][offs], 1, ae_v_len(0,i-1));
59879                     for(j=0; j<=i-1; j++)
59880                     {
59881                         v = a->ptr.pp_double[offs+i][offs+j];
59882                         ae_v_addd(&a->ptr.pp_double[offs+j][offs], 1, &tmp->ptr.p_double[0], 1, ae_v_len(offs,offs+j), v);
59883                     }
59884                     v = a->ptr.pp_double[offs+i][offs+i];
59885                     ae_v_muld(&a->ptr.pp_double[offs+i][offs], 1, ae_v_len(offs,offs+i-1), v);
59886                     a->ptr.pp_double[offs+i][offs+i] = ae_sqr(a->ptr.pp_double[offs+i][offs+i], _state);
59887                 }
59888             }
59889         }
59890         ae_frame_leave(_state);
59891         return;
59892     }
59893 
59894     /*
59895      * Recursive code: triangular factor inversion merged with
59896      * UU' or L'L multiplication
59897      */
59898     tiledsplit(n, tscur, &n1, &n2, _state);
59899 
59900     /*
59901      * form off-diagonal block of trangular inverse
59902      */
59903     if( isupper )
59904     {
59905         for(i=0; i<=n1-1; i++)
59906         {
59907             ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
59908         }
59909         rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 0, a, offs, offs+n1, _state);
59910         rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs, offs+n1, _state);
59911     }
59912     else
59913     {
59914         for(i=0; i<=n2-1; i++)
59915         {
59916             ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
59917         }
59918         rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 0, a, offs+n1, offs, _state);
59919         rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs+n1, offs, _state);
59920     }
59921 
59922     /*
59923      * invert first diagonal block
59924      */
59925     spdmatrixcholeskyinverserec(a, offs, n1, isupper, tmp, _state);
59926 
59927     /*
59928      * update first diagonal block with off-diagonal block,
59929      * update off-diagonal block
59930      */
59931     if( isupper )
59932     {
59933         rmatrixsyrk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper, _state);
59934         rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 1, a, offs, offs+n1, _state);
59935     }
59936     else
59937     {
59938         rmatrixsyrk(n1, n2, 1.0, a, offs+n1, offs, 1, 1.0, a, offs, offs, isupper, _state);
59939         rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 1, a, offs+n1, offs, _state);
59940     }
59941 
59942     /*
59943      * invert second diagonal block
59944      */
59945     spdmatrixcholeskyinverserec(a, offs+n1, n2, isupper, tmp, _state);
59946     ae_frame_leave(_state);
59947 }
59948 
59949 
59950 /*************************************************************************
59951 Serial stub for GPL edition.
59952 *************************************************************************/
_trypexec_spdmatrixcholeskyinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_vector * tmp,ae_state * _state)59953 ae_bool _trypexec_spdmatrixcholeskyinverserec(/* Real    */ ae_matrix* a,
59954     ae_int_t offs,
59955     ae_int_t n,
59956     ae_bool isupper,
59957     /* Real    */ ae_vector* tmp,
59958     ae_state *_state)
59959 {
59960     return ae_false;
59961 }
59962 
59963 
59964 /*************************************************************************
59965 Triangular matrix inversion, recursive subroutine
59966 
59967 NOTE: this function sets Info on failure, leaves it unchanged on success.
59968 
59969 NOTE: only Tmp[Offs:Offs+N-1] is modified, other entries of the temporary array are not modified
59970 
59971   -- ALGLIB --
59972      05.02.2010, Bochkanov Sergey.
59973      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
59974      Courant Institute, Argonne National Lab, and Rice University
59975      February 29, 1992.
59976 *************************************************************************/
matinv_rmatrixtrinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_vector * tmp,sinteger * info,ae_state * _state)59977 static void matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
59978      ae_int_t offs,
59979      ae_int_t n,
59980      ae_bool isupper,
59981      ae_bool isunit,
59982      /* Real    */ ae_vector* tmp,
59983      sinteger* info,
59984      ae_state *_state)
59985 {
59986     ae_int_t n1;
59987     ae_int_t n2;
59988     ae_int_t mn;
59989     ae_int_t i;
59990     ae_int_t j;
59991     double v;
59992     double ajj;
59993     ae_int_t tsa;
59994     ae_int_t tsb;
59995     ae_int_t tscur;
59996 
59997 
59998     if( n<1 )
59999     {
60000         info->val = -1;
60001         return;
60002     }
60003     tsa = matrixtilesizea(_state);
60004     tsb = matrixtilesizeb(_state);
60005     tscur = tsb;
60006     if( n<=tsb )
60007     {
60008         tscur = tsa;
60009     }
60010 
60011     /*
60012      * Try to activate parallelism
60013      */
60014     if( n>=2*tsb&&ae_fp_greater_eq(rmul3((double)(n), (double)(n), (double)(n), _state)*((double)1/(double)3),smpactivationlevel(_state)) )
60015     {
60016         if( _trypexec_matinv_rmatrixtrinverserec(a,offs,n,isupper,isunit,tmp,info, _state) )
60017         {
60018             return;
60019         }
60020     }
60021 
60022     /*
60023      * Base case
60024      */
60025     if( n<=tsa )
60026     {
60027         if( isupper )
60028         {
60029 
60030             /*
60031              * Compute inverse of upper triangular matrix.
60032              */
60033             for(j=0; j<=n-1; j++)
60034             {
60035                 if( !isunit )
60036                 {
60037                     if( ae_fp_eq(a->ptr.pp_double[offs+j][offs+j],(double)(0)) )
60038                     {
60039                         info->val = -3;
60040                         return;
60041                     }
60042                     a->ptr.pp_double[offs+j][offs+j] = 1/a->ptr.pp_double[offs+j][offs+j];
60043                     ajj = -a->ptr.pp_double[offs+j][offs+j];
60044                 }
60045                 else
60046                 {
60047                     ajj = (double)(-1);
60048                 }
60049 
60050                 /*
60051                  * Compute elements 1:j-1 of j-th column.
60052                  */
60053                 if( j>0 )
60054                 {
60055                     ae_v_move(&tmp->ptr.p_double[offs+0], 1, &a->ptr.pp_double[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1));
60056                     for(i=0; i<=j-1; i++)
60057                     {
60058                         if( i<j-1 )
60059                         {
60060                             v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+i+1], 1, &tmp->ptr.p_double[offs+i+1], 1, ae_v_len(offs+i+1,offs+j-1));
60061                         }
60062                         else
60063                         {
60064                             v = (double)(0);
60065                         }
60066                         if( !isunit )
60067                         {
60068                             a->ptr.pp_double[offs+i][offs+j] = v+a->ptr.pp_double[offs+i][offs+i]*tmp->ptr.p_double[offs+i];
60069                         }
60070                         else
60071                         {
60072                             a->ptr.pp_double[offs+i][offs+j] = v+tmp->ptr.p_double[offs+i];
60073                         }
60074                     }
60075                     ae_v_muld(&a->ptr.pp_double[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1), ajj);
60076                 }
60077             }
60078         }
60079         else
60080         {
60081 
60082             /*
60083              * Compute inverse of lower triangular matrix.
60084              */
60085             for(j=n-1; j>=0; j--)
60086             {
60087                 if( !isunit )
60088                 {
60089                     if( ae_fp_eq(a->ptr.pp_double[offs+j][offs+j],(double)(0)) )
60090                     {
60091                         info->val = -3;
60092                         return;
60093                     }
60094                     a->ptr.pp_double[offs+j][offs+j] = 1/a->ptr.pp_double[offs+j][offs+j];
60095                     ajj = -a->ptr.pp_double[offs+j][offs+j];
60096                 }
60097                 else
60098                 {
60099                     ajj = (double)(-1);
60100                 }
60101                 if( j<n-1 )
60102                 {
60103 
60104                     /*
60105                      * Compute elements j+1:n of j-th column.
60106                      */
60107                     ae_v_move(&tmp->ptr.p_double[offs+j+1], 1, &a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1));
60108                     for(i=j+1; i<=n-1; i++)
60109                     {
60110                         if( i>j+1 )
60111                         {
60112                             v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+j+1], 1, &tmp->ptr.p_double[offs+j+1], 1, ae_v_len(offs+j+1,offs+i-1));
60113                         }
60114                         else
60115                         {
60116                             v = (double)(0);
60117                         }
60118                         if( !isunit )
60119                         {
60120                             a->ptr.pp_double[offs+i][offs+j] = v+a->ptr.pp_double[offs+i][offs+i]*tmp->ptr.p_double[offs+i];
60121                         }
60122                         else
60123                         {
60124                             a->ptr.pp_double[offs+i][offs+j] = v+tmp->ptr.p_double[offs+i];
60125                         }
60126                     }
60127                     ae_v_muld(&a->ptr.pp_double[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1), ajj);
60128                 }
60129             }
60130         }
60131         return;
60132     }
60133 
60134     /*
60135      * Recursive case
60136      */
60137     tiledsplit(n, tscur, &n1, &n2, _state);
60138     mn = imin2(n1, n2, _state);
60139     touchint(&mn, _state);
60140     if( n2>0 )
60141     {
60142         if( isupper )
60143         {
60144             for(i=0; i<=n1-1; i++)
60145             {
60146                 ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
60147             }
60148             rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1, _state);
60149             matinv_rmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, _state);
60150             rmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1, _state);
60151         }
60152         else
60153         {
60154             for(i=0; i<=n2-1; i++)
60155             {
60156                 ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
60157             }
60158             rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs, _state);
60159             matinv_rmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, _state);
60160             rmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs, _state);
60161         }
60162     }
60163     matinv_rmatrixtrinverserec(a, offs, n1, isupper, isunit, tmp, info, _state);
60164 }
60165 
60166 
60167 /*************************************************************************
60168 Serial stub for GPL edition.
60169 *************************************************************************/
_trypexec_matinv_rmatrixtrinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_vector * tmp,sinteger * info,ae_state * _state)60170 ae_bool _trypexec_matinv_rmatrixtrinverserec(/* Real    */ ae_matrix* a,
60171     ae_int_t offs,
60172     ae_int_t n,
60173     ae_bool isupper,
60174     ae_bool isunit,
60175     /* Real    */ ae_vector* tmp,
60176     sinteger* info,
60177     ae_state *_state)
60178 {
60179     return ae_false;
60180 }
60181 
60182 
60183 /*************************************************************************
60184 Triangular matrix inversion, recursive subroutine.
60185 
60186 Info is modified on failure, left unchanged on success.
60187 
60188   -- ALGLIB --
60189      05.02.2010, Bochkanov Sergey.
60190      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
60191      Courant Institute, Argonne National Lab, and Rice University
60192      February 29, 1992.
60193 *************************************************************************/
matinv_cmatrixtrinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_vector * tmp,sinteger * info,ae_state * _state)60194 static void matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
60195      ae_int_t offs,
60196      ae_int_t n,
60197      ae_bool isupper,
60198      ae_bool isunit,
60199      /* Complex */ ae_vector* tmp,
60200      sinteger* info,
60201      ae_state *_state)
60202 {
60203     ae_int_t n1;
60204     ae_int_t n2;
60205     ae_int_t i;
60206     ae_int_t j;
60207     ae_complex v;
60208     ae_complex ajj;
60209     ae_int_t tsa;
60210     ae_int_t tsb;
60211     ae_int_t tscur;
60212     ae_int_t mn;
60213 
60214 
60215     if( n<1 )
60216     {
60217         info->val = -1;
60218         return;
60219     }
60220     tsa = matrixtilesizea(_state)/2;
60221     tsb = matrixtilesizeb(_state);
60222     tscur = tsb;
60223     if( n<=tsb )
60224     {
60225         tscur = tsa;
60226     }
60227 
60228     /*
60229      * Try to activate parallelism
60230      */
60231     if( n>=2*tsb&&ae_fp_greater_eq(rmul3((double)(n), (double)(n), (double)(n), _state)*((double)4/(double)3),smpactivationlevel(_state)) )
60232     {
60233         if( _trypexec_matinv_cmatrixtrinverserec(a,offs,n,isupper,isunit,tmp,info, _state) )
60234         {
60235             return;
60236         }
60237     }
60238 
60239     /*
60240      * Base case
60241      */
60242     if( n<=tsa )
60243     {
60244         if( isupper )
60245         {
60246 
60247             /*
60248              * Compute inverse of upper triangular matrix.
60249              */
60250             for(j=0; j<=n-1; j++)
60251             {
60252                 if( !isunit )
60253                 {
60254                     if( ae_c_eq_d(a->ptr.pp_complex[offs+j][offs+j],(double)(0)) )
60255                     {
60256                         info->val = -3;
60257                         return;
60258                     }
60259                     a->ptr.pp_complex[offs+j][offs+j] = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
60260                     ajj = ae_c_neg(a->ptr.pp_complex[offs+j][offs+j]);
60261                 }
60262                 else
60263                 {
60264                     ajj = ae_complex_from_i(-1);
60265                 }
60266 
60267                 /*
60268                  * Compute elements 1:j-1 of j-th column.
60269                  */
60270                 if( j>0 )
60271                 {
60272                     ae_v_cmove(&tmp->ptr.p_complex[offs+0], 1, &a->ptr.pp_complex[offs+0][offs+j], a->stride, "N", ae_v_len(offs+0,offs+j-1));
60273                     for(i=0; i<=j-1; i++)
60274                     {
60275                         if( i<j-1 )
60276                         {
60277                             v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+i+1], 1, "N", &tmp->ptr.p_complex[offs+i+1], 1, "N", ae_v_len(offs+i+1,offs+j-1));
60278                         }
60279                         else
60280                         {
60281                             v = ae_complex_from_i(0);
60282                         }
60283                         if( !isunit )
60284                         {
60285                             a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,ae_c_mul(a->ptr.pp_complex[offs+i][offs+i],tmp->ptr.p_complex[offs+i]));
60286                         }
60287                         else
60288                         {
60289                             a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,tmp->ptr.p_complex[offs+i]);
60290                         }
60291                     }
60292                     ae_v_cmulc(&a->ptr.pp_complex[offs+0][offs+j], a->stride, ae_v_len(offs+0,offs+j-1), ajj);
60293                 }
60294             }
60295         }
60296         else
60297         {
60298 
60299             /*
60300              * Compute inverse of lower triangular matrix.
60301              */
60302             for(j=n-1; j>=0; j--)
60303             {
60304                 if( !isunit )
60305                 {
60306                     if( ae_c_eq_d(a->ptr.pp_complex[offs+j][offs+j],(double)(0)) )
60307                     {
60308                         info->val = -3;
60309                         return;
60310                     }
60311                     a->ptr.pp_complex[offs+j][offs+j] = ae_c_d_div(1,a->ptr.pp_complex[offs+j][offs+j]);
60312                     ajj = ae_c_neg(a->ptr.pp_complex[offs+j][offs+j]);
60313                 }
60314                 else
60315                 {
60316                     ajj = ae_complex_from_i(-1);
60317                 }
60318                 if( j<n-1 )
60319                 {
60320 
60321                     /*
60322                      * Compute elements j+1:n of j-th column.
60323                      */
60324                     ae_v_cmove(&tmp->ptr.p_complex[offs+j+1], 1, &a->ptr.pp_complex[offs+j+1][offs+j], a->stride, "N", ae_v_len(offs+j+1,offs+n-1));
60325                     for(i=j+1; i<=n-1; i++)
60326                     {
60327                         if( i>j+1 )
60328                         {
60329                             v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+j+1], 1, "N", &tmp->ptr.p_complex[offs+j+1], 1, "N", ae_v_len(offs+j+1,offs+i-1));
60330                         }
60331                         else
60332                         {
60333                             v = ae_complex_from_i(0);
60334                         }
60335                         if( !isunit )
60336                         {
60337                             a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,ae_c_mul(a->ptr.pp_complex[offs+i][offs+i],tmp->ptr.p_complex[offs+i]));
60338                         }
60339                         else
60340                         {
60341                             a->ptr.pp_complex[offs+i][offs+j] = ae_c_add(v,tmp->ptr.p_complex[offs+i]);
60342                         }
60343                     }
60344                     ae_v_cmulc(&a->ptr.pp_complex[offs+j+1][offs+j], a->stride, ae_v_len(offs+j+1,offs+n-1), ajj);
60345                 }
60346             }
60347         }
60348         return;
60349     }
60350 
60351     /*
60352      * Recursive case
60353      */
60354     tiledsplit(n, tscur, &n1, &n2, _state);
60355     mn = imin2(n1, n2, _state);
60356     touchint(&mn, _state);
60357     if( n2>0 )
60358     {
60359         if( isupper )
60360         {
60361             for(i=0; i<=n1-1; i++)
60362             {
60363                 ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
60364             }
60365             cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs, offs+n1, _state);
60366             matinv_cmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, _state);
60367             cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, isunit, 0, a, offs, offs+n1, _state);
60368         }
60369         else
60370         {
60371             for(i=0; i<=n2-1; i++)
60372             {
60373                 ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
60374             }
60375             cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, isunit, 0, a, offs+n1, offs, _state);
60376             matinv_cmatrixtrinverserec(a, offs+n1, n2, isupper, isunit, tmp, info, _state);
60377             cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, isunit, 0, a, offs+n1, offs, _state);
60378         }
60379     }
60380     matinv_cmatrixtrinverserec(a, offs, n1, isupper, isunit, tmp, info, _state);
60381 }
60382 
60383 
60384 /*************************************************************************
60385 Serial stub for GPL edition.
60386 *************************************************************************/
_trypexec_matinv_cmatrixtrinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_bool isunit,ae_vector * tmp,sinteger * info,ae_state * _state)60387 ae_bool _trypexec_matinv_cmatrixtrinverserec(/* Complex */ ae_matrix* a,
60388     ae_int_t offs,
60389     ae_int_t n,
60390     ae_bool isupper,
60391     ae_bool isunit,
60392     /* Complex */ ae_vector* tmp,
60393     sinteger* info,
60394     ae_state *_state)
60395 {
60396     return ae_false;
60397 }
60398 
60399 
matinv_rmatrixluinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_vector * work,sinteger * info,matinvreport * rep,ae_state * _state)60400 static void matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
60401      ae_int_t offs,
60402      ae_int_t n,
60403      /* Real    */ ae_vector* work,
60404      sinteger* info,
60405      matinvreport* rep,
60406      ae_state *_state)
60407 {
60408     ae_int_t i;
60409     ae_int_t j;
60410     double v;
60411     ae_int_t n1;
60412     ae_int_t n2;
60413     ae_int_t tsa;
60414     ae_int_t tsb;
60415     ae_int_t tscur;
60416     ae_int_t mn;
60417 
60418 
60419     if( n<1 )
60420     {
60421         info->val = -1;
60422         return;
60423     }
60424     tsa = matrixtilesizea(_state);
60425     tsb = matrixtilesizeb(_state);
60426     tscur = tsb;
60427     if( n<=tsb )
60428     {
60429         tscur = tsa;
60430     }
60431 
60432     /*
60433      * Try parallelism
60434      */
60435     if( n>=2*tsb&&ae_fp_greater_eq((double)8/(double)6*rmul3((double)(n), (double)(n), (double)(n), _state),smpactivationlevel(_state)) )
60436     {
60437         if( _trypexec_matinv_rmatrixluinverserec(a,offs,n,work,info,rep, _state) )
60438         {
60439             return;
60440         }
60441     }
60442 
60443     /*
60444      * Base case
60445      */
60446     if( n<=tsa )
60447     {
60448 
60449         /*
60450          * Form inv(U)
60451          */
60452         matinv_rmatrixtrinverserec(a, offs, n, ae_true, ae_false, work, info, _state);
60453         if( info->val<=0 )
60454         {
60455             return;
60456         }
60457 
60458         /*
60459          * Solve the equation inv(A)*L = inv(U) for inv(A).
60460          */
60461         for(j=n-1; j>=0; j--)
60462         {
60463 
60464             /*
60465              * Copy current column of L to WORK and replace with zeros.
60466              */
60467             for(i=j+1; i<=n-1; i++)
60468             {
60469                 work->ptr.p_double[i] = a->ptr.pp_double[offs+i][offs+j];
60470                 a->ptr.pp_double[offs+i][offs+j] = (double)(0);
60471             }
60472 
60473             /*
60474              * Compute current column of inv(A).
60475              */
60476             if( j<n-1 )
60477             {
60478                 for(i=0; i<=n-1; i++)
60479                 {
60480                     v = ae_v_dotproduct(&a->ptr.pp_double[offs+i][offs+j+1], 1, &work->ptr.p_double[j+1], 1, ae_v_len(offs+j+1,offs+n-1));
60481                     a->ptr.pp_double[offs+i][offs+j] = a->ptr.pp_double[offs+i][offs+j]-v;
60482                 }
60483             }
60484         }
60485         return;
60486     }
60487 
60488     /*
60489      * Recursive code:
60490      *
60491      *         ( L1      )   ( U1  U12 )
60492      * A    =  (         ) * (         )
60493      *         ( L12  L2 )   (     U2  )
60494      *
60495      *         ( W   X )
60496      * A^-1 =  (       )
60497      *         ( Y   Z )
60498      *
60499      * In-place calculation can be done as follows:
60500      * * X := inv(U1)*U12*inv(U2)
60501      * * Y := inv(L2)*L12*inv(L1)
60502      * * W := inv(L1*U1)+X*Y
60503      * * X := -X*inv(L2)
60504      * * Y := -inv(U2)*Y
60505      * * Z := inv(L2*U2)
60506      *
60507      * Reordering w.r.t. interdependencies gives us:
60508      *
60509      * * X := inv(U1)*U12      \ suitable for parallel execution
60510      * * Y := L12*inv(L1)      /
60511      *
60512      * * X := X*inv(U2)        \
60513      * * Y := inv(L2)*Y        | suitable for parallel execution
60514      * * W := inv(L1*U1)       /
60515      *
60516      * * W := W+X*Y
60517      *
60518      * * X := -X*inv(L2)       \ suitable for parallel execution
60519      * * Y := -inv(U2)*Y       /
60520      *
60521      * * Z := inv(L2*U2)
60522      */
60523     tiledsplit(n, tscur, &n1, &n2, _state);
60524     mn = imin2(n1, n2, _state);
60525     touchint(&mn, _state);
60526     ae_assert(n2>0, "LUInverseRec: internal error!", _state);
60527 
60528     /*
60529      * X := inv(U1)*U12
60530      * Y := L12*inv(L1)
60531      */
60532     rmatrixlefttrsm(n1, n2, a, offs, offs, ae_true, ae_false, 0, a, offs, offs+n1, _state);
60533     rmatrixrighttrsm(n2, n1, a, offs, offs, ae_false, ae_true, 0, a, offs+n1, offs, _state);
60534 
60535     /*
60536      * X := X*inv(U2)
60537      * Y := inv(L2)*Y
60538      * W := inv(L1*U1)
60539      */
60540     rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs, offs+n1, _state);
60541     rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs+n1, offs, _state);
60542     matinv_rmatrixluinverserec(a, offs, n1, work, info, rep, _state);
60543     if( info->val<=0 )
60544     {
60545         return;
60546     }
60547 
60548     /*
60549      * W := W+X*Y
60550      */
60551     rmatrixgemm(n1, n1, n2, 1.0, a, offs, offs+n1, 0, a, offs+n1, offs, 0, 1.0, a, offs, offs, _state);
60552 
60553     /*
60554      * X := -X*inv(L2)
60555      * Y := -inv(U2)*Y
60556      */
60557     rmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs, offs+n1, _state);
60558     rmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs+n1, offs, _state);
60559     for(i=0; i<=n1-1; i++)
60560     {
60561         ae_v_muld(&a->ptr.pp_double[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
60562     }
60563     for(i=0; i<=n2-1; i++)
60564     {
60565         ae_v_muld(&a->ptr.pp_double[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
60566     }
60567 
60568     /*
60569      * Z := inv(L2*U2)
60570      */
60571     matinv_rmatrixluinverserec(a, offs+n1, n2, work, info, rep, _state);
60572 }
60573 
60574 
60575 /*************************************************************************
60576 Serial stub for GPL edition.
60577 *************************************************************************/
_trypexec_matinv_rmatrixluinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_vector * work,sinteger * info,matinvreport * rep,ae_state * _state)60578 ae_bool _trypexec_matinv_rmatrixluinverserec(/* Real    */ ae_matrix* a,
60579     ae_int_t offs,
60580     ae_int_t n,
60581     /* Real    */ ae_vector* work,
60582     sinteger* info,
60583     matinvreport* rep,
60584     ae_state *_state)
60585 {
60586     return ae_false;
60587 }
60588 
60589 
matinv_cmatrixluinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_vector * work,sinteger * ssinfo,matinvreport * rep,ae_state * _state)60590 static void matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
60591      ae_int_t offs,
60592      ae_int_t n,
60593      /* Complex */ ae_vector* work,
60594      sinteger* ssinfo,
60595      matinvreport* rep,
60596      ae_state *_state)
60597 {
60598     ae_int_t i;
60599     ae_int_t j;
60600     ae_complex v;
60601     ae_int_t n1;
60602     ae_int_t n2;
60603     ae_int_t mn;
60604     ae_int_t tsa;
60605     ae_int_t tsb;
60606     ae_int_t tscur;
60607 
60608 
60609     if( n<1 )
60610     {
60611         ssinfo->val = -1;
60612         return;
60613     }
60614     tsa = matrixtilesizea(_state)/2;
60615     tsb = matrixtilesizeb(_state);
60616     tscur = tsb;
60617     if( n<=tsb )
60618     {
60619         tscur = tsa;
60620     }
60621 
60622     /*
60623      * Try parallelism
60624      */
60625     if( n>=2*tsb&&ae_fp_greater_eq((double)32/(double)6*rmul3((double)(n), (double)(n), (double)(n), _state),smpactivationlevel(_state)) )
60626     {
60627         if( _trypexec_matinv_cmatrixluinverserec(a,offs,n,work,ssinfo,rep, _state) )
60628         {
60629             return;
60630         }
60631     }
60632 
60633     /*
60634      * Base case
60635      */
60636     if( n<=tsa )
60637     {
60638 
60639         /*
60640          * Form inv(U)
60641          */
60642         matinv_cmatrixtrinverserec(a, offs, n, ae_true, ae_false, work, ssinfo, _state);
60643         if( ssinfo->val<=0 )
60644         {
60645             return;
60646         }
60647 
60648         /*
60649          * Solve the equation inv(A)*L = inv(U) for inv(A).
60650          */
60651         for(j=n-1; j>=0; j--)
60652         {
60653 
60654             /*
60655              * Copy current column of L to WORK and replace with zeros.
60656              */
60657             for(i=j+1; i<=n-1; i++)
60658             {
60659                 work->ptr.p_complex[i] = a->ptr.pp_complex[offs+i][offs+j];
60660                 a->ptr.pp_complex[offs+i][offs+j] = ae_complex_from_i(0);
60661             }
60662 
60663             /*
60664              * Compute current column of inv(A).
60665              */
60666             if( j<n-1 )
60667             {
60668                 for(i=0; i<=n-1; i++)
60669                 {
60670                     v = ae_v_cdotproduct(&a->ptr.pp_complex[offs+i][offs+j+1], 1, "N", &work->ptr.p_complex[j+1], 1, "N", ae_v_len(offs+j+1,offs+n-1));
60671                     a->ptr.pp_complex[offs+i][offs+j] = ae_c_sub(a->ptr.pp_complex[offs+i][offs+j],v);
60672                 }
60673             }
60674         }
60675         return;
60676     }
60677 
60678     /*
60679      * Recursive code:
60680      *
60681      *         ( L1      )   ( U1  U12 )
60682      * A    =  (         ) * (         )
60683      *         ( L12  L2 )   (     U2  )
60684      *
60685      *         ( W   X )
60686      * A^-1 =  (       )
60687      *         ( Y   Z )
60688      *
60689      * In-place calculation can be done as follows:
60690      * * X := inv(U1)*U12*inv(U2)
60691      * * Y := inv(L2)*L12*inv(L1)
60692      * * W := inv(L1*U1)+X*Y
60693      * * X := -X*inv(L2)
60694      * * Y := -inv(U2)*Y
60695      * * Z := inv(L2*U2)
60696      *
60697      * Reordering w.r.t. interdependencies gives us:
60698      *
60699      * * X := inv(U1)*U12      \ suitable for parallel execution
60700      * * Y := L12*inv(L1)      /
60701      *
60702      * * X := X*inv(U2)        \
60703      * * Y := inv(L2)*Y        | suitable for parallel execution
60704      * * W := inv(L1*U1)       /
60705      *
60706      * * W := W+X*Y
60707      *
60708      * * X := -X*inv(L2)       \ suitable for parallel execution
60709      * * Y := -inv(U2)*Y       /
60710      *
60711      * * Z := inv(L2*U2)
60712      */
60713     tiledsplit(n, tscur, &n1, &n2, _state);
60714     mn = imin2(n1, n2, _state);
60715     touchint(&mn, _state);
60716     ae_assert(n2>0, "LUInverseRec: internal error!", _state);
60717 
60718     /*
60719      * X := inv(U1)*U12
60720      * Y := L12*inv(L1)
60721      */
60722     cmatrixlefttrsm(n1, n2, a, offs, offs, ae_true, ae_false, 0, a, offs, offs+n1, _state);
60723     cmatrixrighttrsm(n2, n1, a, offs, offs, ae_false, ae_true, 0, a, offs+n1, offs, _state);
60724 
60725     /*
60726      * X := X*inv(U2)
60727      * Y := inv(L2)*Y
60728      * W := inv(L1*U1)
60729      */
60730     cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs, offs+n1, _state);
60731     cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs+n1, offs, _state);
60732     matinv_cmatrixluinverserec(a, offs, n1, work, ssinfo, rep, _state);
60733     if( ssinfo->val<=0 )
60734     {
60735         return;
60736     }
60737 
60738     /*
60739      * W := W+X*Y
60740      */
60741     cmatrixgemm(n1, n1, n2, ae_complex_from_d(1.0), a, offs, offs+n1, 0, a, offs+n1, offs, 0, ae_complex_from_d(1.0), a, offs, offs, _state);
60742 
60743     /*
60744      * X := -X*inv(L2)
60745      * Y := -inv(U2)*Y
60746      */
60747     cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, ae_false, ae_true, 0, a, offs, offs+n1, _state);
60748     cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, ae_true, ae_false, 0, a, offs+n1, offs, _state);
60749     for(i=0; i<=n1-1; i++)
60750     {
60751         ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
60752     }
60753     for(i=0; i<=n2-1; i++)
60754     {
60755         ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
60756     }
60757 
60758     /*
60759      * Z := inv(L2*U2)
60760      */
60761     matinv_cmatrixluinverserec(a, offs+n1, n2, work, ssinfo, rep, _state);
60762 }
60763 
60764 
60765 /*************************************************************************
60766 Serial stub for GPL edition.
60767 *************************************************************************/
_trypexec_matinv_cmatrixluinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_vector * work,sinteger * ssinfo,matinvreport * rep,ae_state * _state)60768 ae_bool _trypexec_matinv_cmatrixluinverserec(/* Complex */ ae_matrix* a,
60769     ae_int_t offs,
60770     ae_int_t n,
60771     /* Complex */ ae_vector* work,
60772     sinteger* ssinfo,
60773     matinvreport* rep,
60774     ae_state *_state)
60775 {
60776     return ae_false;
60777 }
60778 
60779 
60780 /*************************************************************************
60781 Recursive subroutine for HPD inversion.
60782 
60783   -- ALGLIB routine --
60784      10.02.2010
60785      Bochkanov Sergey
60786 *************************************************************************/
matinv_hpdmatrixcholeskyinverserec(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_vector * tmp,ae_state * _state)60787 static void matinv_hpdmatrixcholeskyinverserec(/* Complex */ ae_matrix* a,
60788      ae_int_t offs,
60789      ae_int_t n,
60790      ae_bool isupper,
60791      /* Complex */ ae_vector* tmp,
60792      ae_state *_state)
60793 {
60794     ae_frame _frame_block;
60795     ae_int_t i;
60796     ae_int_t j;
60797     ae_complex v;
60798     ae_int_t n1;
60799     ae_int_t n2;
60800     sinteger sinfo;
60801     ae_int_t tsa;
60802     ae_int_t tsb;
60803     ae_int_t tscur;
60804 
60805     ae_frame_make(_state, &_frame_block);
60806     memset(&sinfo, 0, sizeof(sinfo));
60807     _sinteger_init(&sinfo, _state, ae_true);
60808 
60809     if( n<1 )
60810     {
60811         ae_frame_leave(_state);
60812         return;
60813     }
60814     tsa = matrixtilesizea(_state)/2;
60815     tsb = matrixtilesizeb(_state);
60816     tscur = tsb;
60817     if( n<=tsb )
60818     {
60819         tscur = tsa;
60820     }
60821 
60822     /*
60823      * Base case
60824      */
60825     if( n<=tsa )
60826     {
60827         sinfo.val = 1;
60828         matinv_cmatrixtrinverserec(a, offs, n, isupper, ae_false, tmp, &sinfo, _state);
60829         ae_assert(sinfo.val>0, "HPDMatrixCholeskyInverseRec: integrity check failed", _state);
60830         if( isupper )
60831         {
60832 
60833             /*
60834              * Compute the product U * U'.
60835              * NOTE: we never assume that diagonal of U is real
60836              */
60837             for(i=0; i<=n-1; i++)
60838             {
60839                 if( i==0 )
60840                 {
60841 
60842                     /*
60843                      * 1x1 matrix
60844                      */
60845                     a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
60846                 }
60847                 else
60848                 {
60849 
60850                     /*
60851                      * (I+1)x(I+1) matrix,
60852                      *
60853                      * ( A11  A12 )   ( A11^H        )   ( A11*A11^H+A12*A12^H  A12*A22^H )
60854                      * (          ) * (              ) = (                                )
60855                      * (      A22 )   ( A12^H  A22^H )   ( A22*A12^H            A22*A22^H )
60856                      *
60857                      * A11 is IxI, A22 is 1x1.
60858                      */
60859                     ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs][offs+i], a->stride, "Conj", ae_v_len(0,i-1));
60860                     for(j=0; j<=i-1; j++)
60861                     {
60862                         v = a->ptr.pp_complex[offs+j][offs+i];
60863                         ae_v_caddc(&a->ptr.pp_complex[offs+j][offs+j], 1, &tmp->ptr.p_complex[j], 1, "N", ae_v_len(offs+j,offs+i-1), v);
60864                     }
60865                     v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+i], _state);
60866                     ae_v_cmulc(&a->ptr.pp_complex[offs][offs+i], a->stride, ae_v_len(offs,offs+i-1), v);
60867                     a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
60868                 }
60869             }
60870         }
60871         else
60872         {
60873 
60874             /*
60875              * Compute the product L' * L
60876              * NOTE: we never assume that diagonal of L is real
60877              */
60878             for(i=0; i<=n-1; i++)
60879             {
60880                 if( i==0 )
60881                 {
60882 
60883                     /*
60884                      * 1x1 matrix
60885                      */
60886                     a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
60887                 }
60888                 else
60889                 {
60890 
60891                     /*
60892                      * (I+1)x(I+1) matrix,
60893                      *
60894                      * ( A11^H  A21^H )   ( A11      )   ( A11^H*A11+A21^H*A21  A21^H*A22 )
60895                      * (              ) * (          ) = (                                )
60896                      * (        A22^H )   ( A21  A22 )   ( A22^H*A21            A22^H*A22 )
60897                      *
60898                      * A11 is IxI, A22 is 1x1.
60899                      */
60900                     ae_v_cmove(&tmp->ptr.p_complex[0], 1, &a->ptr.pp_complex[offs+i][offs], 1, "N", ae_v_len(0,i-1));
60901                     for(j=0; j<=i-1; j++)
60902                     {
60903                         v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+j], _state);
60904                         ae_v_caddc(&a->ptr.pp_complex[offs+j][offs], 1, &tmp->ptr.p_complex[0], 1, "N", ae_v_len(offs,offs+j), v);
60905                     }
60906                     v = ae_c_conj(a->ptr.pp_complex[offs+i][offs+i], _state);
60907                     ae_v_cmulc(&a->ptr.pp_complex[offs+i][offs], 1, ae_v_len(offs,offs+i-1), v);
60908                     a->ptr.pp_complex[offs+i][offs+i] = ae_complex_from_d(ae_sqr(a->ptr.pp_complex[offs+i][offs+i].x, _state)+ae_sqr(a->ptr.pp_complex[offs+i][offs+i].y, _state));
60909                 }
60910             }
60911         }
60912         ae_frame_leave(_state);
60913         return;
60914     }
60915 
60916     /*
60917      * Recursive code: triangular factor inversion merged with
60918      * UU' or L'L multiplication
60919      */
60920     tiledsplit(n, tscur, &n1, &n2, _state);
60921 
60922     /*
60923      * form off-diagonal block of trangular inverse
60924      */
60925     if( isupper )
60926     {
60927         for(i=0; i<=n1-1; i++)
60928         {
60929             ae_v_cmuld(&a->ptr.pp_complex[offs+i][offs+n1], 1, ae_v_len(offs+n1,offs+n-1), -1);
60930         }
60931         cmatrixlefttrsm(n1, n2, a, offs, offs, isupper, ae_false, 0, a, offs, offs+n1, _state);
60932         cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs, offs+n1, _state);
60933     }
60934     else
60935     {
60936         for(i=0; i<=n2-1; i++)
60937         {
60938             ae_v_cmuld(&a->ptr.pp_complex[offs+n1+i][offs], 1, ae_v_len(offs,offs+n1-1), -1);
60939         }
60940         cmatrixrighttrsm(n2, n1, a, offs, offs, isupper, ae_false, 0, a, offs+n1, offs, _state);
60941         cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 0, a, offs+n1, offs, _state);
60942     }
60943 
60944     /*
60945      * invert first diagonal block
60946      */
60947     matinv_hpdmatrixcholeskyinverserec(a, offs, n1, isupper, tmp, _state);
60948 
60949     /*
60950      * update first diagonal block with off-diagonal block,
60951      * update off-diagonal block
60952      */
60953     if( isupper )
60954     {
60955         cmatrixherk(n1, n2, 1.0, a, offs, offs+n1, 0, 1.0, a, offs, offs, isupper, _state);
60956         cmatrixrighttrsm(n1, n2, a, offs+n1, offs+n1, isupper, ae_false, 2, a, offs, offs+n1, _state);
60957     }
60958     else
60959     {
60960         cmatrixherk(n1, n2, 1.0, a, offs+n1, offs, 2, 1.0, a, offs, offs, isupper, _state);
60961         cmatrixlefttrsm(n2, n1, a, offs+n1, offs+n1, isupper, ae_false, 2, a, offs+n1, offs, _state);
60962     }
60963 
60964     /*
60965      * invert second diagonal block
60966      */
60967     matinv_hpdmatrixcholeskyinverserec(a, offs+n1, n2, isupper, tmp, _state);
60968     ae_frame_leave(_state);
60969 }
60970 
60971 
_matinvreport_init(void * _p,ae_state * _state,ae_bool make_automatic)60972 void _matinvreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
60973 {
60974     matinvreport *p = (matinvreport*)_p;
60975     ae_touch_ptr((void*)p);
60976 }
60977 
60978 
_matinvreport_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)60979 void _matinvreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
60980 {
60981     matinvreport *dst = (matinvreport*)_dst;
60982     matinvreport *src = (matinvreport*)_src;
60983     dst->r1 = src->r1;
60984     dst->rinf = src->rinf;
60985 }
60986 
60987 
_matinvreport_clear(void * _p)60988 void _matinvreport_clear(void* _p)
60989 {
60990     matinvreport *p = (matinvreport*)_p;
60991     ae_touch_ptr((void*)p);
60992 }
60993 
60994 
_matinvreport_destroy(void * _p)60995 void _matinvreport_destroy(void* _p)
60996 {
60997     matinvreport *p = (matinvreport*)_p;
60998     ae_touch_ptr((void*)p);
60999 }
61000 
61001 
61002 #endif
61003 #if defined(AE_COMPILE_INVERSEUPDATE) || !defined(AE_PARTIAL_BUILD)
61004 
61005 
61006 /*************************************************************************
61007 Inverse matrix update by the Sherman-Morrison formula
61008 
61009 The algorithm updates matrix A^-1 when adding a number to an element
61010 of matrix A.
61011 
61012 Input parameters:
61013     InvA    -   inverse of matrix A.
61014                 Array whose indexes range within [0..N-1, 0..N-1].
61015     N       -   size of matrix A.
61016     UpdRow  -   row where the element to be updated is stored.
61017     UpdColumn - column where the element to be updated is stored.
61018     UpdVal  -   a number to be added to the element.
61019 
61020 
61021 Output parameters:
61022     InvA    -   inverse of modified matrix A.
61023 
61024   -- ALGLIB --
61025      Copyright 2005 by Bochkanov Sergey
61026 *************************************************************************/
rmatrixinvupdatesimple(ae_matrix * inva,ae_int_t n,ae_int_t updrow,ae_int_t updcolumn,double updval,ae_state * _state)61027 void rmatrixinvupdatesimple(/* Real    */ ae_matrix* inva,
61028      ae_int_t n,
61029      ae_int_t updrow,
61030      ae_int_t updcolumn,
61031      double updval,
61032      ae_state *_state)
61033 {
61034     ae_frame _frame_block;
61035     ae_vector t1;
61036     ae_vector t2;
61037     ae_int_t i;
61038     double lambdav;
61039     double vt;
61040 
61041     ae_frame_make(_state, &_frame_block);
61042     memset(&t1, 0, sizeof(t1));
61043     memset(&t2, 0, sizeof(t2));
61044     ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
61045     ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
61046 
61047     ae_assert(updrow>=0&&updrow<n, "RMatrixInvUpdateSimple: incorrect UpdRow!", _state);
61048     ae_assert(updcolumn>=0&&updcolumn<n, "RMatrixInvUpdateSimple: incorrect UpdColumn!", _state);
61049     ae_vector_set_length(&t1, n-1+1, _state);
61050     ae_vector_set_length(&t2, n-1+1, _state);
61051 
61052     /*
61053      * T1 = InvA * U
61054      */
61055     ae_v_move(&t1.ptr.p_double[0], 1, &inva->ptr.pp_double[0][updrow], inva->stride, ae_v_len(0,n-1));
61056 
61057     /*
61058      * T2 = v*InvA
61059      */
61060     ae_v_move(&t2.ptr.p_double[0], 1, &inva->ptr.pp_double[updcolumn][0], 1, ae_v_len(0,n-1));
61061 
61062     /*
61063      * Lambda = v * InvA * U
61064      */
61065     lambdav = updval*inva->ptr.pp_double[updcolumn][updrow];
61066 
61067     /*
61068      * InvA = InvA - correction
61069      */
61070     for(i=0; i<=n-1; i++)
61071     {
61072         vt = updval*t1.ptr.p_double[i];
61073         vt = vt/(1+lambdav);
61074         ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
61075     }
61076     ae_frame_leave(_state);
61077 }
61078 
61079 
61080 /*************************************************************************
61081 Inverse matrix update by the Sherman-Morrison formula
61082 
61083 The algorithm updates matrix A^-1 when adding a vector to a row
61084 of matrix A.
61085 
61086 Input parameters:
61087     InvA    -   inverse of matrix A.
61088                 Array whose indexes range within [0..N-1, 0..N-1].
61089     N       -   size of matrix A.
61090     UpdRow  -   the row of A whose vector V was added.
61091                 0 <= Row <= N-1
61092     V       -   the vector to be added to a row.
61093                 Array whose index ranges within [0..N-1].
61094 
61095 Output parameters:
61096     InvA    -   inverse of modified matrix A.
61097 
61098   -- ALGLIB --
61099      Copyright 2005 by Bochkanov Sergey
61100 *************************************************************************/
rmatrixinvupdaterow(ae_matrix * inva,ae_int_t n,ae_int_t updrow,ae_vector * v,ae_state * _state)61101 void rmatrixinvupdaterow(/* Real    */ ae_matrix* inva,
61102      ae_int_t n,
61103      ae_int_t updrow,
61104      /* Real    */ ae_vector* v,
61105      ae_state *_state)
61106 {
61107     ae_frame _frame_block;
61108     ae_vector t1;
61109     ae_vector t2;
61110     ae_int_t i;
61111     ae_int_t j;
61112     double lambdav;
61113     double vt;
61114 
61115     ae_frame_make(_state, &_frame_block);
61116     memset(&t1, 0, sizeof(t1));
61117     memset(&t2, 0, sizeof(t2));
61118     ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
61119     ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
61120 
61121     ae_vector_set_length(&t1, n-1+1, _state);
61122     ae_vector_set_length(&t2, n-1+1, _state);
61123 
61124     /*
61125      * T1 = InvA * U
61126      */
61127     ae_v_move(&t1.ptr.p_double[0], 1, &inva->ptr.pp_double[0][updrow], inva->stride, ae_v_len(0,n-1));
61128 
61129     /*
61130      * T2 = v*InvA
61131      * Lambda = v * InvA * U
61132      */
61133     for(j=0; j<=n-1; j++)
61134     {
61135         vt = ae_v_dotproduct(&v->ptr.p_double[0], 1, &inva->ptr.pp_double[0][j], inva->stride, ae_v_len(0,n-1));
61136         t2.ptr.p_double[j] = vt;
61137     }
61138     lambdav = t2.ptr.p_double[updrow];
61139 
61140     /*
61141      * InvA = InvA - correction
61142      */
61143     for(i=0; i<=n-1; i++)
61144     {
61145         vt = t1.ptr.p_double[i]/(1+lambdav);
61146         ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
61147     }
61148     ae_frame_leave(_state);
61149 }
61150 
61151 
61152 /*************************************************************************
61153 Inverse matrix update by the Sherman-Morrison formula
61154 
61155 The algorithm updates matrix A^-1 when adding a vector to a column
61156 of matrix A.
61157 
61158 Input parameters:
61159     InvA        -   inverse of matrix A.
61160                     Array whose indexes range within [0..N-1, 0..N-1].
61161     N           -   size of matrix A.
61162     UpdColumn   -   the column of A whose vector U was added.
61163                     0 <= UpdColumn <= N-1
61164     U           -   the vector to be added to a column.
61165                     Array whose index ranges within [0..N-1].
61166 
61167 Output parameters:
61168     InvA        -   inverse of modified matrix A.
61169 
61170   -- ALGLIB --
61171      Copyright 2005 by Bochkanov Sergey
61172 *************************************************************************/
rmatrixinvupdatecolumn(ae_matrix * inva,ae_int_t n,ae_int_t updcolumn,ae_vector * u,ae_state * _state)61173 void rmatrixinvupdatecolumn(/* Real    */ ae_matrix* inva,
61174      ae_int_t n,
61175      ae_int_t updcolumn,
61176      /* Real    */ ae_vector* u,
61177      ae_state *_state)
61178 {
61179     ae_frame _frame_block;
61180     ae_vector t1;
61181     ae_vector t2;
61182     ae_int_t i;
61183     double lambdav;
61184     double vt;
61185 
61186     ae_frame_make(_state, &_frame_block);
61187     memset(&t1, 0, sizeof(t1));
61188     memset(&t2, 0, sizeof(t2));
61189     ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
61190     ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
61191 
61192     ae_vector_set_length(&t1, n-1+1, _state);
61193     ae_vector_set_length(&t2, n-1+1, _state);
61194 
61195     /*
61196      * T1 = InvA * U
61197      * Lambda = v * InvA * U
61198      */
61199     for(i=0; i<=n-1; i++)
61200     {
61201         vt = ae_v_dotproduct(&inva->ptr.pp_double[i][0], 1, &u->ptr.p_double[0], 1, ae_v_len(0,n-1));
61202         t1.ptr.p_double[i] = vt;
61203     }
61204     lambdav = t1.ptr.p_double[updcolumn];
61205 
61206     /*
61207      * T2 = v*InvA
61208      */
61209     ae_v_move(&t2.ptr.p_double[0], 1, &inva->ptr.pp_double[updcolumn][0], 1, ae_v_len(0,n-1));
61210 
61211     /*
61212      * InvA = InvA - correction
61213      */
61214     for(i=0; i<=n-1; i++)
61215     {
61216         vt = t1.ptr.p_double[i]/(1+lambdav);
61217         ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
61218     }
61219     ae_frame_leave(_state);
61220 }
61221 
61222 
61223 /*************************************************************************
61224 Inverse matrix update by the Sherman-Morrison formula
61225 
61226 The algorithm computes the inverse of matrix A+u*v' by using the given matrix
61227 A^-1 and the vectors u and v.
61228 
61229 Input parameters:
61230     InvA    -   inverse of matrix A.
61231                 Array whose indexes range within [0..N-1, 0..N-1].
61232     N       -   size of matrix A.
61233     U       -   the vector modifying the matrix.
61234                 Array whose index ranges within [0..N-1].
61235     V       -   the vector modifying the matrix.
61236                 Array whose index ranges within [0..N-1].
61237 
61238 Output parameters:
61239     InvA - inverse of matrix A + u*v'.
61240 
61241   -- ALGLIB --
61242      Copyright 2005 by Bochkanov Sergey
61243 *************************************************************************/
rmatrixinvupdateuv(ae_matrix * inva,ae_int_t n,ae_vector * u,ae_vector * v,ae_state * _state)61244 void rmatrixinvupdateuv(/* Real    */ ae_matrix* inva,
61245      ae_int_t n,
61246      /* Real    */ ae_vector* u,
61247      /* Real    */ ae_vector* v,
61248      ae_state *_state)
61249 {
61250     ae_frame _frame_block;
61251     ae_vector t1;
61252     ae_vector t2;
61253     ae_int_t i;
61254     ae_int_t j;
61255     double lambdav;
61256     double vt;
61257 
61258     ae_frame_make(_state, &_frame_block);
61259     memset(&t1, 0, sizeof(t1));
61260     memset(&t2, 0, sizeof(t2));
61261     ae_vector_init(&t1, 0, DT_REAL, _state, ae_true);
61262     ae_vector_init(&t2, 0, DT_REAL, _state, ae_true);
61263 
61264     ae_vector_set_length(&t1, n-1+1, _state);
61265     ae_vector_set_length(&t2, n-1+1, _state);
61266 
61267     /*
61268      * T1 = InvA * U
61269      * Lambda = v * T1
61270      */
61271     for(i=0; i<=n-1; i++)
61272     {
61273         vt = ae_v_dotproduct(&inva->ptr.pp_double[i][0], 1, &u->ptr.p_double[0], 1, ae_v_len(0,n-1));
61274         t1.ptr.p_double[i] = vt;
61275     }
61276     lambdav = ae_v_dotproduct(&v->ptr.p_double[0], 1, &t1.ptr.p_double[0], 1, ae_v_len(0,n-1));
61277 
61278     /*
61279      * T2 = v*InvA
61280      */
61281     for(j=0; j<=n-1; j++)
61282     {
61283         vt = ae_v_dotproduct(&v->ptr.p_double[0], 1, &inva->ptr.pp_double[0][j], inva->stride, ae_v_len(0,n-1));
61284         t2.ptr.p_double[j] = vt;
61285     }
61286 
61287     /*
61288      * InvA = InvA - correction
61289      */
61290     for(i=0; i<=n-1; i++)
61291     {
61292         vt = t1.ptr.p_double[i]/(1+lambdav);
61293         ae_v_subd(&inva->ptr.pp_double[i][0], 1, &t2.ptr.p_double[0], 1, ae_v_len(0,n-1), vt);
61294     }
61295     ae_frame_leave(_state);
61296 }
61297 
61298 
61299 #endif
61300 #if defined(AE_COMPILE_SCHUR) || !defined(AE_PARTIAL_BUILD)
61301 
61302 
61303 /*************************************************************************
61304 Subroutine performing the Schur decomposition of a general matrix by using
61305 the QR algorithm with multiple shifts.
61306 
61307 COMMERCIAL EDITION OF ALGLIB:
61308 
61309   ! Commercial version of ALGLIB includes one  important  improvement   of
61310   ! this function, which can be used from C++ and C#:
61311   ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
61312   !
61313   ! Intel MKL gives approximately constant  (with  respect  to  number  of
61314   ! worker threads) acceleration factor which depends on CPU  being  used,
61315   ! problem  size  and  "baseline"  ALGLIB  edition  which  is  used   for
61316   ! comparison.
61317   !
61318   ! Multithreaded acceleration is NOT supported for this function.
61319   !
61320   ! We recommend you to read 'Working with commercial version' section  of
61321   ! ALGLIB Reference Manual in order to find out how to  use  performance-
61322   ! related features provided by commercial edition of ALGLIB.
61323 
61324 The source matrix A is represented as S'*A*S = T, where S is an orthogonal
61325 matrix (Schur vectors), T - upper quasi-triangular matrix (with blocks of
61326 sizes 1x1 and 2x2 on the main diagonal).
61327 
61328 Input parameters:
61329     A   -   matrix to be decomposed.
61330             Array whose indexes range within [0..N-1, 0..N-1].
61331     N   -   size of A, N>=0.
61332 
61333 
61334 Output parameters:
61335     A   -   contains matrix T.
61336             Array whose indexes range within [0..N-1, 0..N-1].
61337     S   -   contains Schur vectors.
61338             Array whose indexes range within [0..N-1, 0..N-1].
61339 
61340 Note 1:
61341     The block structure of matrix T can be easily recognized: since all
61342     the elements below the blocks are zeros, the elements a[i+1,i] which
61343     are equal to 0 show the block border.
61344 
61345 Note 2:
61346     The algorithm performance depends on the value of the internal parameter
61347     NS of the InternalSchurDecomposition subroutine which defines the number
61348     of shifts in the QR algorithm (similarly to the block width in block-matrix
61349     algorithms in linear algebra). If you require maximum performance on
61350     your machine, it is recommended to adjust this parameter manually.
61351 
61352 Result:
61353     True,
61354         if the algorithm has converged and parameters A and S contain the result.
61355     False,
61356         if the algorithm has not converged.
61357 
61358 Algorithm implemented on the basis of the DHSEQR subroutine (LAPACK 3.0 library).
61359 *************************************************************************/
rmatrixschur(ae_matrix * a,ae_int_t n,ae_matrix * s,ae_state * _state)61360 ae_bool rmatrixschur(/* Real    */ ae_matrix* a,
61361      ae_int_t n,
61362      /* Real    */ ae_matrix* s,
61363      ae_state *_state)
61364 {
61365     ae_frame _frame_block;
61366     ae_vector tau;
61367     ae_vector wi;
61368     ae_vector wr;
61369     ae_int_t info;
61370     ae_bool result;
61371 
61372     ae_frame_make(_state, &_frame_block);
61373     memset(&tau, 0, sizeof(tau));
61374     memset(&wi, 0, sizeof(wi));
61375     memset(&wr, 0, sizeof(wr));
61376     ae_matrix_clear(s);
61377     ae_vector_init(&tau, 0, DT_REAL, _state, ae_true);
61378     ae_vector_init(&wi, 0, DT_REAL, _state, ae_true);
61379     ae_vector_init(&wr, 0, DT_REAL, _state, ae_true);
61380 
61381 
61382     /*
61383      * Upper Hessenberg form of the 0-based matrix
61384      */
61385     rmatrixhessenberg(a, n, &tau, _state);
61386     rmatrixhessenbergunpackq(a, n, &tau, s, _state);
61387 
61388     /*
61389      * Schur decomposition
61390      */
61391     rmatrixinternalschurdecomposition(a, n, 1, 1, &wr, &wi, s, &info, _state);
61392     result = info==0;
61393     ae_frame_leave(_state);
61394     return result;
61395 }
61396 
61397 
61398 #endif
61399 #if defined(AE_COMPILE_SPDGEVD) || !defined(AE_PARTIAL_BUILD)
61400 
61401 
61402 /*************************************************************************
61403 Algorithm for solving the following generalized symmetric positive-definite
61404 eigenproblem:
61405     A*x = lambda*B*x (1) or
61406     A*B*x = lambda*x (2) or
61407     B*A*x = lambda*x (3).
61408 where A is a symmetric matrix, B - symmetric positive-definite matrix.
61409 The problem is solved by reducing it to an ordinary  symmetric  eigenvalue
61410 problem.
61411 
61412 Input parameters:
61413     A           -   symmetric matrix which is given by its upper or lower
61414                     triangular part.
61415                     Array whose indexes range within [0..N-1, 0..N-1].
61416     N           -   size of matrices A and B.
61417     IsUpperA    -   storage format of matrix A.
61418     B           -   symmetric positive-definite matrix which is given by
61419                     its upper or lower triangular part.
61420                     Array whose indexes range within [0..N-1, 0..N-1].
61421     IsUpperB    -   storage format of matrix B.
61422     ZNeeded     -   if ZNeeded is equal to:
61423                      * 0, the eigenvectors are not returned;
61424                      * 1, the eigenvectors are returned.
61425     ProblemType -   if ProblemType is equal to:
61426                      * 1, the following problem is solved: A*x = lambda*B*x;
61427                      * 2, the following problem is solved: A*B*x = lambda*x;
61428                      * 3, the following problem is solved: B*A*x = lambda*x.
61429 
61430 Output parameters:
61431     D           -   eigenvalues in ascending order.
61432                     Array whose index ranges within [0..N-1].
61433     Z           -   if ZNeeded is equal to:
61434                      * 0, Z hasn't changed;
61435                      * 1, Z contains eigenvectors.
61436                     Array whose indexes range within [0..N-1, 0..N-1].
61437                     The eigenvectors are stored in matrix columns. It should
61438                     be noted that the eigenvectors in such problems do not
61439                     form an orthogonal system.
61440 
61441 Result:
61442     True, if the problem was solved successfully.
61443     False, if the error occurred during the Cholesky decomposition of matrix
61444     B (the matrix isn't positive-definite) or during the work of the iterative
61445     algorithm for solving the symmetric eigenproblem.
61446 
61447 See also the GeneralizedSymmetricDefiniteEVDReduce subroutine.
61448 
61449   -- ALGLIB --
61450      Copyright 1.28.2006 by Bochkanov Sergey
61451 *************************************************************************/
smatrixgevd(ae_matrix * a,ae_int_t n,ae_bool isuppera,ae_matrix * b,ae_bool isupperb,ae_int_t zneeded,ae_int_t problemtype,ae_vector * d,ae_matrix * z,ae_state * _state)61452 ae_bool smatrixgevd(/* Real    */ ae_matrix* a,
61453      ae_int_t n,
61454      ae_bool isuppera,
61455      /* Real    */ ae_matrix* b,
61456      ae_bool isupperb,
61457      ae_int_t zneeded,
61458      ae_int_t problemtype,
61459      /* Real    */ ae_vector* d,
61460      /* Real    */ ae_matrix* z,
61461      ae_state *_state)
61462 {
61463     ae_frame _frame_block;
61464     ae_matrix _a;
61465     ae_matrix r;
61466     ae_matrix t;
61467     ae_bool isupperr;
61468     ae_int_t j1;
61469     ae_int_t j2;
61470     ae_int_t j1inc;
61471     ae_int_t j2inc;
61472     ae_int_t i;
61473     ae_int_t j;
61474     double v;
61475     ae_bool result;
61476 
61477     ae_frame_make(_state, &_frame_block);
61478     memset(&_a, 0, sizeof(_a));
61479     memset(&r, 0, sizeof(r));
61480     memset(&t, 0, sizeof(t));
61481     ae_matrix_init_copy(&_a, a, _state, ae_true);
61482     a = &_a;
61483     ae_vector_clear(d);
61484     ae_matrix_clear(z);
61485     ae_matrix_init(&r, 0, 0, DT_REAL, _state, ae_true);
61486     ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
61487 
61488 
61489     /*
61490      * Reduce and solve
61491      */
61492     result = smatrixgevdreduce(a, n, isuppera, b, isupperb, problemtype, &r, &isupperr, _state);
61493     if( !result )
61494     {
61495         ae_frame_leave(_state);
61496         return result;
61497     }
61498     result = smatrixevd(a, n, zneeded, isuppera, d, &t, _state);
61499     if( !result )
61500     {
61501         ae_frame_leave(_state);
61502         return result;
61503     }
61504 
61505     /*
61506      * Transform eigenvectors if needed
61507      */
61508     if( zneeded!=0 )
61509     {
61510 
61511         /*
61512          * fill Z with zeros
61513          */
61514         ae_matrix_set_length(z, n-1+1, n-1+1, _state);
61515         for(j=0; j<=n-1; j++)
61516         {
61517             z->ptr.pp_double[0][j] = 0.0;
61518         }
61519         for(i=1; i<=n-1; i++)
61520         {
61521             ae_v_move(&z->ptr.pp_double[i][0], 1, &z->ptr.pp_double[0][0], 1, ae_v_len(0,n-1));
61522         }
61523 
61524         /*
61525          * Setup R properties
61526          */
61527         if( isupperr )
61528         {
61529             j1 = 0;
61530             j2 = n-1;
61531             j1inc = 1;
61532             j2inc = 0;
61533         }
61534         else
61535         {
61536             j1 = 0;
61537             j2 = 0;
61538             j1inc = 0;
61539             j2inc = 1;
61540         }
61541 
61542         /*
61543          * Calculate R*Z
61544          */
61545         for(i=0; i<=n-1; i++)
61546         {
61547             for(j=j1; j<=j2; j++)
61548             {
61549                 v = r.ptr.pp_double[i][j];
61550                 ae_v_addd(&z->ptr.pp_double[i][0], 1, &t.ptr.pp_double[j][0], 1, ae_v_len(0,n-1), v);
61551             }
61552             j1 = j1+j1inc;
61553             j2 = j2+j2inc;
61554         }
61555     }
61556     ae_frame_leave(_state);
61557     return result;
61558 }
61559 
61560 
61561 /*************************************************************************
61562 Algorithm for reduction of the following generalized symmetric positive-
61563 definite eigenvalue problem:
61564     A*x = lambda*B*x (1) or
61565     A*B*x = lambda*x (2) or
61566     B*A*x = lambda*x (3)
61567 to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and
61568 the given problems are the same, and the eigenvectors of the given problem
61569 could be obtained by multiplying the obtained eigenvectors by the
61570 transformation matrix x = R*y).
61571 
61572 Here A is a symmetric matrix, B - symmetric positive-definite matrix.
61573 
61574 Input parameters:
61575     A           -   symmetric matrix which is given by its upper or lower
61576                     triangular part.
61577                     Array whose indexes range within [0..N-1, 0..N-1].
61578     N           -   size of matrices A and B.
61579     IsUpperA    -   storage format of matrix A.
61580     B           -   symmetric positive-definite matrix which is given by
61581                     its upper or lower triangular part.
61582                     Array whose indexes range within [0..N-1, 0..N-1].
61583     IsUpperB    -   storage format of matrix B.
61584     ProblemType -   if ProblemType is equal to:
61585                      * 1, the following problem is solved: A*x = lambda*B*x;
61586                      * 2, the following problem is solved: A*B*x = lambda*x;
61587                      * 3, the following problem is solved: B*A*x = lambda*x.
61588 
61589 Output parameters:
61590     A           -   symmetric matrix which is given by its upper or lower
61591                     triangle depending on IsUpperA. Contains matrix C.
61592                     Array whose indexes range within [0..N-1, 0..N-1].
61593     R           -   upper triangular or low triangular transformation matrix
61594                     which is used to obtain the eigenvectors of a given problem
61595                     as the product of eigenvectors of C (from the right) and
61596                     matrix R (from the left). If the matrix is upper
61597                     triangular, the elements below the main diagonal
61598                     are equal to 0 (and vice versa). Thus, we can perform
61599                     the multiplication without taking into account the
61600                     internal structure (which is an easier though less
61601                     effective way).
61602                     Array whose indexes range within [0..N-1, 0..N-1].
61603     IsUpperR    -   type of matrix R (upper or lower triangular).
61604 
61605 Result:
61606     True, if the problem was reduced successfully.
61607     False, if the error occurred during the Cholesky decomposition of
61608         matrix B (the matrix is not positive-definite).
61609 
61610   -- ALGLIB --
61611      Copyright 1.28.2006 by Bochkanov Sergey
61612 *************************************************************************/
smatrixgevdreduce(ae_matrix * a,ae_int_t n,ae_bool isuppera,ae_matrix * b,ae_bool isupperb,ae_int_t problemtype,ae_matrix * r,ae_bool * isupperr,ae_state * _state)61613 ae_bool smatrixgevdreduce(/* Real    */ ae_matrix* a,
61614      ae_int_t n,
61615      ae_bool isuppera,
61616      /* Real    */ ae_matrix* b,
61617      ae_bool isupperb,
61618      ae_int_t problemtype,
61619      /* Real    */ ae_matrix* r,
61620      ae_bool* isupperr,
61621      ae_state *_state)
61622 {
61623     ae_frame _frame_block;
61624     ae_matrix t;
61625     ae_vector w1;
61626     ae_vector w2;
61627     ae_vector w3;
61628     ae_int_t i;
61629     ae_int_t j;
61630     double v;
61631     matinvreport rep;
61632     ae_int_t info;
61633     ae_bool result;
61634 
61635     ae_frame_make(_state, &_frame_block);
61636     memset(&t, 0, sizeof(t));
61637     memset(&w1, 0, sizeof(w1));
61638     memset(&w2, 0, sizeof(w2));
61639     memset(&w3, 0, sizeof(w3));
61640     memset(&rep, 0, sizeof(rep));
61641     ae_matrix_clear(r);
61642     *isupperr = ae_false;
61643     ae_matrix_init(&t, 0, 0, DT_REAL, _state, ae_true);
61644     ae_vector_init(&w1, 0, DT_REAL, _state, ae_true);
61645     ae_vector_init(&w2, 0, DT_REAL, _state, ae_true);
61646     ae_vector_init(&w3, 0, DT_REAL, _state, ae_true);
61647     _matinvreport_init(&rep, _state, ae_true);
61648 
61649     ae_assert(n>0, "SMatrixGEVDReduce: N<=0!", _state);
61650     ae_assert((problemtype==1||problemtype==2)||problemtype==3, "SMatrixGEVDReduce: incorrect ProblemType!", _state);
61651     result = ae_true;
61652 
61653     /*
61654      * Problem 1:  A*x = lambda*B*x
61655      *
61656      * Reducing to:
61657      *     C*y = lambda*y
61658      *     C = L^(-1) * A * L^(-T)
61659      *     x = L^(-T) * y
61660      */
61661     if( problemtype==1 )
61662     {
61663 
61664         /*
61665          * Factorize B in T: B = LL'
61666          */
61667         ae_matrix_set_length(&t, n-1+1, n-1+1, _state);
61668         if( isupperb )
61669         {
61670             for(i=0; i<=n-1; i++)
61671             {
61672                 ae_v_move(&t.ptr.pp_double[i][i], t.stride, &b->ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
61673             }
61674         }
61675         else
61676         {
61677             for(i=0; i<=n-1; i++)
61678             {
61679                 ae_v_move(&t.ptr.pp_double[i][0], 1, &b->ptr.pp_double[i][0], 1, ae_v_len(0,i));
61680             }
61681         }
61682         if( !spdmatrixcholesky(&t, n, ae_false, _state) )
61683         {
61684             result = ae_false;
61685             ae_frame_leave(_state);
61686             return result;
61687         }
61688 
61689         /*
61690          * Invert L in T
61691          */
61692         rmatrixtrinverse(&t, n, ae_false, ae_false, &info, &rep, _state);
61693         if( info<=0 )
61694         {
61695             result = ae_false;
61696             ae_frame_leave(_state);
61697             return result;
61698         }
61699 
61700         /*
61701          * Build L^(-1) * A * L^(-T) in R
61702          */
61703         ae_vector_set_length(&w1, n+1, _state);
61704         ae_vector_set_length(&w2, n+1, _state);
61705         ae_matrix_set_length(r, n-1+1, n-1+1, _state);
61706         for(j=1; j<=n; j++)
61707         {
61708 
61709             /*
61710              * Form w2 = A * l'(j) (here l'(j) is j-th column of L^(-T))
61711              */
61712             ae_v_move(&w1.ptr.p_double[1], 1, &t.ptr.pp_double[j-1][0], 1, ae_v_len(1,j));
61713             symmetricmatrixvectormultiply(a, isuppera, 0, j-1, &w1, 1.0, &w2, _state);
61714             if( isuppera )
61715             {
61716                 matrixvectormultiply(a, 0, j-1, j, n-1, ae_true, &w1, 1, j, 1.0, &w2, j+1, n, 0.0, _state);
61717             }
61718             else
61719             {
61720                 matrixvectormultiply(a, j, n-1, 0, j-1, ae_false, &w1, 1, j, 1.0, &w2, j+1, n, 0.0, _state);
61721             }
61722 
61723             /*
61724              * Form l(i)*w2 (here l(i) is i-th row of L^(-1))
61725              */
61726             for(i=1; i<=n; i++)
61727             {
61728                 v = ae_v_dotproduct(&t.ptr.pp_double[i-1][0], 1, &w2.ptr.p_double[1], 1, ae_v_len(0,i-1));
61729                 r->ptr.pp_double[i-1][j-1] = v;
61730             }
61731         }
61732 
61733         /*
61734          * Copy R to A
61735          */
61736         for(i=0; i<=n-1; i++)
61737         {
61738             ae_v_move(&a->ptr.pp_double[i][0], 1, &r->ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
61739         }
61740 
61741         /*
61742          * Copy L^(-1) from T to R and transpose
61743          */
61744         *isupperr = ae_true;
61745         for(i=0; i<=n-1; i++)
61746         {
61747             for(j=0; j<=i-1; j++)
61748             {
61749                 r->ptr.pp_double[i][j] = (double)(0);
61750             }
61751         }
61752         for(i=0; i<=n-1; i++)
61753         {
61754             ae_v_move(&r->ptr.pp_double[i][i], 1, &t.ptr.pp_double[i][i], t.stride, ae_v_len(i,n-1));
61755         }
61756         ae_frame_leave(_state);
61757         return result;
61758     }
61759 
61760     /*
61761      * Problem 2:  A*B*x = lambda*x
61762      * or
61763      * problem 3:  B*A*x = lambda*x
61764      *
61765      * Reducing to:
61766      *     C*y = lambda*y
61767      *     C = U * A * U'
61768      *     B = U'* U
61769      */
61770     if( problemtype==2||problemtype==3 )
61771     {
61772 
61773         /*
61774          * Factorize B in T: B = U'*U
61775          */
61776         ae_matrix_set_length(&t, n-1+1, n-1+1, _state);
61777         if( isupperb )
61778         {
61779             for(i=0; i<=n-1; i++)
61780             {
61781                 ae_v_move(&t.ptr.pp_double[i][i], 1, &b->ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
61782             }
61783         }
61784         else
61785         {
61786             for(i=0; i<=n-1; i++)
61787             {
61788                 ae_v_move(&t.ptr.pp_double[i][i], 1, &b->ptr.pp_double[i][i], b->stride, ae_v_len(i,n-1));
61789             }
61790         }
61791         if( !spdmatrixcholesky(&t, n, ae_true, _state) )
61792         {
61793             result = ae_false;
61794             ae_frame_leave(_state);
61795             return result;
61796         }
61797 
61798         /*
61799          * Build U * A * U' in R
61800          */
61801         ae_vector_set_length(&w1, n+1, _state);
61802         ae_vector_set_length(&w2, n+1, _state);
61803         ae_vector_set_length(&w3, n+1, _state);
61804         ae_matrix_set_length(r, n-1+1, n-1+1, _state);
61805         for(j=1; j<=n; j++)
61806         {
61807 
61808             /*
61809              * Form w2 = A * u'(j) (here u'(j) is j-th column of U')
61810              */
61811             ae_v_move(&w1.ptr.p_double[1], 1, &t.ptr.pp_double[j-1][j-1], 1, ae_v_len(1,n-j+1));
61812             symmetricmatrixvectormultiply(a, isuppera, j-1, n-1, &w1, 1.0, &w3, _state);
61813             ae_v_move(&w2.ptr.p_double[j], 1, &w3.ptr.p_double[1], 1, ae_v_len(j,n));
61814             ae_v_move(&w1.ptr.p_double[j], 1, &t.ptr.pp_double[j-1][j-1], 1, ae_v_len(j,n));
61815             if( isuppera )
61816             {
61817                 matrixvectormultiply(a, 0, j-2, j-1, n-1, ae_false, &w1, j, n, 1.0, &w2, 1, j-1, 0.0, _state);
61818             }
61819             else
61820             {
61821                 matrixvectormultiply(a, j-1, n-1, 0, j-2, ae_true, &w1, j, n, 1.0, &w2, 1, j-1, 0.0, _state);
61822             }
61823 
61824             /*
61825              * Form u(i)*w2 (here u(i) is i-th row of U)
61826              */
61827             for(i=1; i<=n; i++)
61828             {
61829                 v = ae_v_dotproduct(&t.ptr.pp_double[i-1][i-1], 1, &w2.ptr.p_double[i], 1, ae_v_len(i-1,n-1));
61830                 r->ptr.pp_double[i-1][j-1] = v;
61831             }
61832         }
61833 
61834         /*
61835          * Copy R to A
61836          */
61837         for(i=0; i<=n-1; i++)
61838         {
61839             ae_v_move(&a->ptr.pp_double[i][0], 1, &r->ptr.pp_double[i][0], 1, ae_v_len(0,n-1));
61840         }
61841         if( problemtype==2 )
61842         {
61843 
61844             /*
61845              * Invert U in T
61846              */
61847             rmatrixtrinverse(&t, n, ae_true, ae_false, &info, &rep, _state);
61848             if( info<=0 )
61849             {
61850                 result = ae_false;
61851                 ae_frame_leave(_state);
61852                 return result;
61853             }
61854 
61855             /*
61856              * Copy U^-1 from T to R
61857              */
61858             *isupperr = ae_true;
61859             for(i=0; i<=n-1; i++)
61860             {
61861                 for(j=0; j<=i-1; j++)
61862                 {
61863                     r->ptr.pp_double[i][j] = (double)(0);
61864                 }
61865             }
61866             for(i=0; i<=n-1; i++)
61867             {
61868                 ae_v_move(&r->ptr.pp_double[i][i], 1, &t.ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
61869             }
61870         }
61871         else
61872         {
61873 
61874             /*
61875              * Copy U from T to R and transpose
61876              */
61877             *isupperr = ae_false;
61878             for(i=0; i<=n-1; i++)
61879             {
61880                 for(j=i+1; j<=n-1; j++)
61881                 {
61882                     r->ptr.pp_double[i][j] = (double)(0);
61883                 }
61884             }
61885             for(i=0; i<=n-1; i++)
61886             {
61887                 ae_v_move(&r->ptr.pp_double[i][i], r->stride, &t.ptr.pp_double[i][i], 1, ae_v_len(i,n-1));
61888             }
61889         }
61890     }
61891     ae_frame_leave(_state);
61892     return result;
61893 }
61894 
61895 
61896 #endif
61897 #if defined(AE_COMPILE_MATDET) || !defined(AE_PARTIAL_BUILD)
61898 
61899 
61900 /*************************************************************************
61901 Determinant calculation of the matrix given by its LU decomposition.
61902 
61903 Input parameters:
61904     A       -   LU decomposition of the matrix (output of
61905                 RMatrixLU subroutine).
61906     Pivots  -   table of permutations which were made during
61907                 the LU decomposition.
61908                 Output of RMatrixLU subroutine.
61909     N       -   (optional) size of matrix A:
61910                 * if given, only principal NxN submatrix is processed and
61911                   overwritten. other elements are unchanged.
61912                 * if not given, automatically determined from matrix size
61913                   (A must be square matrix)
61914 
61915 Result: matrix determinant.
61916 
61917   -- ALGLIB --
61918      Copyright 2005 by Bochkanov Sergey
61919 *************************************************************************/
rmatrixludet(ae_matrix * a,ae_vector * pivots,ae_int_t n,ae_state * _state)61920 double rmatrixludet(/* Real    */ ae_matrix* a,
61921      /* Integer */ ae_vector* pivots,
61922      ae_int_t n,
61923      ae_state *_state)
61924 {
61925     ae_int_t i;
61926     ae_int_t s;
61927     double result;
61928 
61929 
61930     ae_assert(n>=1, "RMatrixLUDet: N<1!", _state);
61931     ae_assert(pivots->cnt>=n, "RMatrixLUDet: Pivots array is too short!", _state);
61932     ae_assert(a->rows>=n, "RMatrixLUDet: rows(A)<N!", _state);
61933     ae_assert(a->cols>=n, "RMatrixLUDet: cols(A)<N!", _state);
61934     ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixLUDet: A contains infinite or NaN values!", _state);
61935     result = (double)(1);
61936     s = 1;
61937     for(i=0; i<=n-1; i++)
61938     {
61939         result = result*a->ptr.pp_double[i][i];
61940         if( pivots->ptr.p_int[i]!=i )
61941         {
61942             s = -s;
61943         }
61944     }
61945     result = result*s;
61946     return result;
61947 }
61948 
61949 
61950 /*************************************************************************
61951 Calculation of the determinant of a general matrix
61952 
61953 Input parameters:
61954     A       -   matrix, array[0..N-1, 0..N-1]
61955     N       -   (optional) size of matrix A:
61956                 * if given, only principal NxN submatrix is processed and
61957                   overwritten. other elements are unchanged.
61958                 * if not given, automatically determined from matrix size
61959                   (A must be square matrix)
61960 
61961 Result: determinant of matrix A.
61962 
61963   -- ALGLIB --
61964      Copyright 2005 by Bochkanov Sergey
61965 *************************************************************************/
rmatrixdet(ae_matrix * a,ae_int_t n,ae_state * _state)61966 double rmatrixdet(/* Real    */ ae_matrix* a,
61967      ae_int_t n,
61968      ae_state *_state)
61969 {
61970     ae_frame _frame_block;
61971     ae_matrix _a;
61972     ae_vector pivots;
61973     double result;
61974 
61975     ae_frame_make(_state, &_frame_block);
61976     memset(&_a, 0, sizeof(_a));
61977     memset(&pivots, 0, sizeof(pivots));
61978     ae_matrix_init_copy(&_a, a, _state, ae_true);
61979     a = &_a;
61980     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
61981 
61982     ae_assert(n>=1, "RMatrixDet: N<1!", _state);
61983     ae_assert(a->rows>=n, "RMatrixDet: rows(A)<N!", _state);
61984     ae_assert(a->cols>=n, "RMatrixDet: cols(A)<N!", _state);
61985     ae_assert(apservisfinitematrix(a, n, n, _state), "RMatrixDet: A contains infinite or NaN values!", _state);
61986     rmatrixlu(a, n, n, &pivots, _state);
61987     result = rmatrixludet(a, &pivots, n, _state);
61988     ae_frame_leave(_state);
61989     return result;
61990 }
61991 
61992 
61993 /*************************************************************************
61994 Determinant calculation of the matrix given by its LU decomposition.
61995 
61996 Input parameters:
61997     A       -   LU decomposition of the matrix (output of
61998                 RMatrixLU subroutine).
61999     Pivots  -   table of permutations which were made during
62000                 the LU decomposition.
62001                 Output of RMatrixLU subroutine.
62002     N       -   (optional) size of matrix A:
62003                 * if given, only principal NxN submatrix is processed and
62004                   overwritten. other elements are unchanged.
62005                 * if not given, automatically determined from matrix size
62006                   (A must be square matrix)
62007 
62008 Result: matrix determinant.
62009 
62010   -- ALGLIB --
62011      Copyright 2005 by Bochkanov Sergey
62012 *************************************************************************/
cmatrixludet(ae_matrix * a,ae_vector * pivots,ae_int_t n,ae_state * _state)62013 ae_complex cmatrixludet(/* Complex */ ae_matrix* a,
62014      /* Integer */ ae_vector* pivots,
62015      ae_int_t n,
62016      ae_state *_state)
62017 {
62018     ae_int_t i;
62019     ae_int_t s;
62020     ae_complex result;
62021 
62022 
62023     ae_assert(n>=1, "CMatrixLUDet: N<1!", _state);
62024     ae_assert(pivots->cnt>=n, "CMatrixLUDet: Pivots array is too short!", _state);
62025     ae_assert(a->rows>=n, "CMatrixLUDet: rows(A)<N!", _state);
62026     ae_assert(a->cols>=n, "CMatrixLUDet: cols(A)<N!", _state);
62027     ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixLUDet: A contains infinite or NaN values!", _state);
62028     result = ae_complex_from_i(1);
62029     s = 1;
62030     for(i=0; i<=n-1; i++)
62031     {
62032         result = ae_c_mul(result,a->ptr.pp_complex[i][i]);
62033         if( pivots->ptr.p_int[i]!=i )
62034         {
62035             s = -s;
62036         }
62037     }
62038     result = ae_c_mul_d(result,(double)(s));
62039     return result;
62040 }
62041 
62042 
62043 /*************************************************************************
62044 Calculation of the determinant of a general matrix
62045 
62046 Input parameters:
62047     A       -   matrix, array[0..N-1, 0..N-1]
62048     N       -   (optional) size of matrix A:
62049                 * if given, only principal NxN submatrix is processed and
62050                   overwritten. other elements are unchanged.
62051                 * if not given, automatically determined from matrix size
62052                   (A must be square matrix)
62053 
62054 Result: determinant of matrix A.
62055 
62056   -- ALGLIB --
62057      Copyright 2005 by Bochkanov Sergey
62058 *************************************************************************/
cmatrixdet(ae_matrix * a,ae_int_t n,ae_state * _state)62059 ae_complex cmatrixdet(/* Complex */ ae_matrix* a,
62060      ae_int_t n,
62061      ae_state *_state)
62062 {
62063     ae_frame _frame_block;
62064     ae_matrix _a;
62065     ae_vector pivots;
62066     ae_complex result;
62067 
62068     ae_frame_make(_state, &_frame_block);
62069     memset(&_a, 0, sizeof(_a));
62070     memset(&pivots, 0, sizeof(pivots));
62071     ae_matrix_init_copy(&_a, a, _state, ae_true);
62072     a = &_a;
62073     ae_vector_init(&pivots, 0, DT_INT, _state, ae_true);
62074 
62075     ae_assert(n>=1, "CMatrixDet: N<1!", _state);
62076     ae_assert(a->rows>=n, "CMatrixDet: rows(A)<N!", _state);
62077     ae_assert(a->cols>=n, "CMatrixDet: cols(A)<N!", _state);
62078     ae_assert(apservisfinitecmatrix(a, n, n, _state), "CMatrixDet: A contains infinite or NaN values!", _state);
62079     cmatrixlu(a, n, n, &pivots, _state);
62080     result = cmatrixludet(a, &pivots, n, _state);
62081     ae_frame_leave(_state);
62082     return result;
62083 }
62084 
62085 
62086 /*************************************************************************
62087 Determinant calculation of the matrix given by the Cholesky decomposition.
62088 
62089 Input parameters:
62090     A       -   Cholesky decomposition,
62091                 output of SMatrixCholesky subroutine.
62092     N       -   (optional) size of matrix A:
62093                 * if given, only principal NxN submatrix is processed and
62094                   overwritten. other elements are unchanged.
62095                 * if not given, automatically determined from matrix size
62096                   (A must be square matrix)
62097 
62098 As the determinant is equal to the product of squares of diagonal elements,
62099 it's not necessary to specify which triangle - lower or upper - the matrix
62100 is stored in.
62101 
62102 Result:
62103     matrix determinant.
62104 
62105   -- ALGLIB --
62106      Copyright 2005-2008 by Bochkanov Sergey
62107 *************************************************************************/
spdmatrixcholeskydet(ae_matrix * a,ae_int_t n,ae_state * _state)62108 double spdmatrixcholeskydet(/* Real    */ ae_matrix* a,
62109      ae_int_t n,
62110      ae_state *_state)
62111 {
62112     ae_int_t i;
62113     ae_bool f;
62114     double result;
62115 
62116 
62117     ae_assert(n>=1, "SPDMatrixCholeskyDet: N<1!", _state);
62118     ae_assert(a->rows>=n, "SPDMatrixCholeskyDet: rows(A)<N!", _state);
62119     ae_assert(a->cols>=n, "SPDMatrixCholeskyDet: cols(A)<N!", _state);
62120     f = ae_true;
62121     for(i=0; i<=n-1; i++)
62122     {
62123         f = f&&ae_isfinite(a->ptr.pp_double[i][i], _state);
62124     }
62125     ae_assert(f, "SPDMatrixCholeskyDet: A contains infinite or NaN values!", _state);
62126     result = (double)(1);
62127     for(i=0; i<=n-1; i++)
62128     {
62129         result = result*ae_sqr(a->ptr.pp_double[i][i], _state);
62130     }
62131     return result;
62132 }
62133 
62134 
62135 /*************************************************************************
62136 Determinant calculation of the symmetric positive definite matrix.
62137 
62138 Input parameters:
62139     A       -   matrix. Array with elements [0..N-1, 0..N-1].
62140     N       -   (optional) size of matrix A:
62141                 * if given, only principal NxN submatrix is processed and
62142                   overwritten. other elements are unchanged.
62143                 * if not given, automatically determined from matrix size
62144                   (A must be square matrix)
62145     IsUpper -   (optional) storage type:
62146                 * if True, symmetric matrix  A  is  given  by  its  upper
62147                   triangle, and the lower triangle isn't used/changed  by
62148                   function
62149                 * if False, symmetric matrix  A  is  given  by  its lower
62150                   triangle, and the upper triangle isn't used/changed  by
62151                   function
62152                 * if not given, both lower and upper  triangles  must  be
62153                   filled.
62154 
62155 Result:
62156     determinant of matrix A.
62157     If matrix A is not positive definite, exception is thrown.
62158 
62159   -- ALGLIB --
62160      Copyright 2005-2008 by Bochkanov Sergey
62161 *************************************************************************/
spdmatrixdet(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_state * _state)62162 double spdmatrixdet(/* Real    */ ae_matrix* a,
62163      ae_int_t n,
62164      ae_bool isupper,
62165      ae_state *_state)
62166 {
62167     ae_frame _frame_block;
62168     ae_matrix _a;
62169     ae_bool b;
62170     double result;
62171 
62172     ae_frame_make(_state, &_frame_block);
62173     memset(&_a, 0, sizeof(_a));
62174     ae_matrix_init_copy(&_a, a, _state, ae_true);
62175     a = &_a;
62176 
62177     ae_assert(n>=1, "SPDMatrixDet: N<1!", _state);
62178     ae_assert(a->rows>=n, "SPDMatrixDet: rows(A)<N!", _state);
62179     ae_assert(a->cols>=n, "SPDMatrixDet: cols(A)<N!", _state);
62180     ae_assert(isfinitertrmatrix(a, n, isupper, _state), "SPDMatrixDet: A contains infinite or NaN values!", _state);
62181     b = spdmatrixcholesky(a, n, isupper, _state);
62182     ae_assert(b, "SPDMatrixDet: A is not SPD!", _state);
62183     result = spdmatrixcholeskydet(a, n, _state);
62184     ae_frame_leave(_state);
62185     return result;
62186 }
62187 
62188 
62189 #endif
62190 
62191 }
62192 
62193