1 // SPDX-License-Identifier: Apache-2.0
2 //
3 // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
4 // Copyright 2008-2016 National ICT Australia (NICTA)
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 // ------------------------------------------------------------------------
17
18
19 //! \addtogroup auxlib
20 //! @{
21
22
23
24 template<typename eT>
25 inline
26 bool
inv(Mat<eT> & A)27 auxlib::inv(Mat<eT>& A)
28 {
29 arma_extra_debug_sigprint();
30
31 if(A.is_empty()) { return true; }
32
33 #if defined(ARMA_USE_ATLAS)
34 {
35 arma_debug_assert_atlas_size(A);
36
37 podarray<int> ipiv(A.n_rows);
38
39 int info = 0;
40
41 arma_extra_debug_print("atlas::clapack_getrf()");
42 info = atlas::clapack_getrf(atlas::CblasColMajor, A.n_rows, A.n_cols, A.memptr(), A.n_rows, ipiv.memptr());
43
44 if(info != 0) { return false; }
45
46 arma_extra_debug_print("atlas::clapack_getri()");
47 info = atlas::clapack_getri(atlas::CblasColMajor, A.n_rows, A.memptr(), A.n_rows, ipiv.memptr());
48
49 return (info == 0);
50 }
51 #elif defined(ARMA_USE_LAPACK)
52 {
53 arma_debug_assert_blas_size(A);
54
55 blas_int n = blas_int(A.n_rows);
56 blas_int lda = blas_int(A.n_rows);
57 blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n);
58 blas_int info = 0;
59
60 podarray<blas_int> ipiv(A.n_rows);
61
62 if(n > 16)
63 {
64 eT work_query[2];
65 blas_int lwork_query = -1;
66
67 arma_extra_debug_print("lapack::getri()");
68 lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info);
69
70 if(info != 0) { return false; }
71
72 blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
73
74 lwork = (std::max)(lwork_proposed, lwork);
75 }
76
77 podarray<eT> work( static_cast<uword>(lwork) );
78
79 arma_extra_debug_print("lapack::getrf()");
80 lapack::getrf(&n, &n, A.memptr(), &lda, ipiv.memptr(), &info);
81
82 if(info != 0) { return false; }
83
84 arma_extra_debug_print("lapack::getri()");
85 lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info);
86
87 return (info == 0);
88 }
89 #else
90 {
91 arma_ignore(A);
92 arma_stop_logic_error("inv(): use of ATLAS or LAPACK must be enabled");
93 return false;
94 }
95 #endif
96 }
97
98
99
100 template<typename eT>
101 inline
102 bool
inv(Mat<eT> & out,const Mat<eT> & X)103 auxlib::inv(Mat<eT>& out, const Mat<eT>& X)
104 {
105 arma_extra_debug_sigprint();
106
107 out = X;
108
109 return auxlib::inv(out);
110 }
111
112
113
114 template<typename eT>
115 inline
116 bool
inv_tr(Mat<eT> & A,const uword layout)117 auxlib::inv_tr(Mat<eT>& A, const uword layout)
118 {
119 arma_extra_debug_sigprint();
120
121 #if defined(ARMA_USE_LAPACK)
122 {
123 if(A.is_empty()) { return true; }
124
125 arma_debug_assert_blas_size(A);
126
127 char uplo = (layout == 0) ? 'U' : 'L';
128 char diag = 'N';
129 blas_int n = blas_int(A.n_rows);
130 blas_int info = 0;
131
132 arma_extra_debug_print("lapack::trtri()");
133 lapack::trtri(&uplo, &diag, &n, A.memptr(), &n, &info);
134
135 if(info != 0) { return false; }
136
137 if(layout == 0)
138 {
139 A = trimatu(A); // upper triangular
140 }
141 else
142 {
143 A = trimatl(A); // lower triangular
144 }
145
146 return true;
147 }
148 #else
149 {
150 arma_ignore(A);
151 arma_ignore(layout);
152 arma_stop_logic_error("inv(): use of LAPACK must be enabled");
153 return false;
154 }
155 #endif
156 }
157
158
159
160 template<typename eT>
161 inline
162 bool
inv_sympd(Mat<eT> & A)163 auxlib::inv_sympd(Mat<eT>& A)
164 {
165 arma_extra_debug_sigprint();
166
167 if(A.is_empty()) { return true; }
168
169 #if defined(ARMA_USE_ATLAS)
170 {
171 arma_debug_assert_atlas_size(A);
172
173 int info = 0;
174
175 arma_extra_debug_print("atlas::clapack_potrf()");
176 info = atlas::clapack_potrf(atlas::CblasColMajor, atlas::CblasLower, A.n_rows, A.memptr(), A.n_rows);
177
178 if(info != 0) { return false; }
179
180 arma_extra_debug_print("atlas::clapack_potri()");
181 info = atlas::clapack_potri(atlas::CblasColMajor, atlas::CblasLower, A.n_rows, A.memptr(), A.n_rows);
182
183 if(info != 0) { return false; }
184
185 A = symmatl(A);
186
187 return true;
188 }
189 #elif defined(ARMA_USE_LAPACK)
190 {
191 arma_debug_assert_blas_size(A);
192
193 char uplo = 'L';
194 blas_int n = blas_int(A.n_rows);
195 blas_int info = 0;
196
197 // NOTE: for complex matrices, zpotrf() assumes the matrix is hermitian (not simply symmetric)
198
199 arma_extra_debug_print("lapack::potrf()");
200 lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
201
202 if(info != 0) { return false; }
203
204 arma_extra_debug_print("lapack::potri()");
205 lapack::potri(&uplo, &n, A.memptr(), &n, &info);
206
207 if(info != 0) { return false; }
208
209 A = symmatl(A);
210
211 return true;
212 }
213 #else
214 {
215 arma_ignore(A);
216 arma_stop_logic_error("inv_sympd(): use of ATLAS or LAPACK must be enabled");
217 return false;
218 }
219 #endif
220 }
221
222
223
224 template<typename eT>
225 inline
226 bool
inv_sympd(Mat<eT> & out,const Mat<eT> & X)227 auxlib::inv_sympd(Mat<eT>& out, const Mat<eT>& X)
228 {
229 arma_extra_debug_sigprint();
230
231 out = X;
232
233 return auxlib::inv_sympd(out);
234 }
235
236
237
238 template<typename eT>
239 inline
240 bool
inv_sympd_rcond(Mat<eT> & A,const eT rcond_threshold)241 auxlib::inv_sympd_rcond(Mat<eT>& A, const eT rcond_threshold)
242 {
243 arma_extra_debug_sigprint();
244
245 if(A.is_empty()) { return true; }
246
247 #if defined(ARMA_USE_LAPACK)
248 {
249 typedef typename get_pod_type<eT>::result T;
250
251 arma_debug_assert_blas_size(A);
252
253 char norm_id = '1';
254 char uplo = 'L';
255 blas_int n = blas_int(A.n_rows);
256 blas_int info = 0;
257 T norm_val = T(0);
258
259 podarray<T> work(A.n_rows);
260
261 arma_extra_debug_print("lapack::lansy()");
262 norm_val = lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
263
264 arma_extra_debug_print("lapack::potrf()");
265 lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
266
267 if(info != 0) { return false; }
268
269 const T rcond = auxlib::lu_rcond_sympd<T>(A, norm_val);
270
271 if(rcond < rcond_threshold) { return false; }
272
273 arma_extra_debug_print("lapack::potri()");
274 lapack::potri(&uplo, &n, A.memptr(), &n, &info);
275
276 if(info != 0) { return false; }
277
278 A = symmatl(A);
279
280 return true;
281 }
282 #else
283 {
284 arma_ignore(A);
285 arma_ignore(rcond_threshold);
286 arma_stop_logic_error("inv_sympd_rcond(): use LAPACK must be enabled");
287 return false;
288 }
289 #endif
290 }
291
292
293
294 template<typename T>
295 inline
296 bool
inv_sympd_rcond(Mat<std::complex<T>> & A,const T rcond_threshold)297 auxlib::inv_sympd_rcond(Mat< std::complex<T> >& A, const T rcond_threshold)
298 {
299 arma_extra_debug_sigprint();
300
301 if(A.is_empty()) { return true; }
302
303 #if defined(ARMA_CRIPPLED_LAPACK)
304 {
305 arma_ignore(A);
306 arma_ignore(rcond_threshold);
307 return false;
308 }
309 #elif defined(ARMA_USE_LAPACK)
310 {
311 arma_debug_assert_blas_size(A);
312
313 char norm_id = '1';
314 char uplo = 'L';
315 blas_int n = blas_int(A.n_rows);
316 blas_int info = 0;
317 T norm_val = T(0);
318
319 podarray<T> work(A.n_rows);
320
321 arma_extra_debug_print("lapack::lanhe()");
322 norm_val = lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
323
324 arma_extra_debug_print("lapack::potrf()");
325 lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
326
327 if(info != 0) { return false; }
328
329 const T rcond = auxlib::lu_rcond_sympd<T>(A, norm_val);
330
331 if(rcond < rcond_threshold) { return false; }
332
333 arma_extra_debug_print("lapack::potri()");
334 lapack::potri(&uplo, &n, A.memptr(), &n, &info);
335
336 if(info != 0) { return false; }
337
338 A = symmatl(A);
339
340 return true;
341 }
342 #else
343 {
344 arma_ignore(A);
345 arma_ignore(rcond_threshold);
346 arma_stop_logic_error("inv_sympd_rcond(): use LAPACK must be enabled");
347 return false;
348 }
349 #endif
350 }
351
352
353
354 //! determinant of a matrix
355 template<typename eT>
356 inline
357 bool
det(eT & out_val,Mat<eT> & A)358 auxlib::det(eT& out_val, Mat<eT>& A)
359 {
360 arma_extra_debug_sigprint();
361
362 if(A.is_empty()) { out_val = eT(1); return true; }
363
364 #if defined(ARMA_USE_ATLAS)
365 {
366 arma_debug_assert_atlas_size(A);
367
368 podarray<int> ipiv(A.n_rows);
369
370 arma_extra_debug_print("atlas::clapack_getrf()");
371 const int info = atlas::clapack_getrf(atlas::CblasColMajor, A.n_rows, A.n_cols, A.memptr(), A.n_rows, ipiv.memptr());
372
373 if(info < 0) { return false; }
374
375 // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
376 eT val = A.at(0,0);
377 for(uword i=1; i < A.n_rows; ++i) { val *= A.at(i,i); }
378
379 int sign = +1;
380 for(uword i=0; i < A.n_rows; ++i)
381 {
382 // NOTE: no adjustment required, as the clapack version of getrf() assumes counting from 0
383 if( int(i) != ipiv.mem[i] ) { sign *= -1; }
384 }
385
386 out_val = (sign < 0) ? eT(-val) : eT(val);
387
388 return true;
389 }
390 #elif defined(ARMA_USE_LAPACK)
391 {
392 arma_debug_assert_blas_size(A);
393
394 podarray<blas_int> ipiv(A.n_rows);
395
396 blas_int info = 0;
397 blas_int n_rows = blas_int(A.n_rows);
398 blas_int n_cols = blas_int(A.n_cols);
399
400 arma_extra_debug_print("lapack::getrf()");
401 lapack::getrf(&n_rows, &n_cols, A.memptr(), &n_rows, ipiv.memptr(), &info);
402
403 if(info < 0) { return false; }
404
405 // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
406 eT val = A.at(0,0);
407 for(uword i=1; i < A.n_rows; ++i) { val *= A.at(i,i); }
408
409 blas_int sign = +1;
410 for(uword i=0; i < A.n_rows; ++i)
411 {
412 // NOTE: adjustment of -1 is required as Fortran counts from 1
413 if( blas_int(i) != (ipiv.mem[i] - 1) ) { sign *= -1; }
414 }
415
416 out_val = (sign < 0) ? eT(-val) : eT(val);
417
418 return true;
419 }
420 #else
421 {
422 arma_ignore(out_val);
423 arma_ignore(A);
424 arma_stop_logic_error("det(): use of ATLAS or LAPACK must be enabled");
425 return false;
426 }
427 #endif
428 }
429
430
431
432 //! log determinant of a matrix
433 template<typename eT>
434 inline
435 bool
log_det(eT & out_val,typename get_pod_type<eT>::result & out_sign,Mat<eT> & A)436 auxlib::log_det(eT& out_val, typename get_pod_type<eT>::result& out_sign, Mat<eT>& A)
437 {
438 arma_extra_debug_sigprint();
439
440 typedef typename get_pod_type<eT>::result T;
441
442 if(A.is_empty())
443 {
444 out_val = eT(0);
445 out_sign = T(1);
446 return true;
447 }
448
449 #if defined(ARMA_USE_ATLAS)
450 {
451 arma_debug_assert_atlas_size(A);
452
453 podarray<int> ipiv(A.n_rows);
454
455 arma_extra_debug_print("atlas::clapack_getrf()");
456 const int info = atlas::clapack_getrf(atlas::CblasColMajor, A.n_rows, A.n_cols, A.memptr(), A.n_rows, ipiv.memptr());
457
458 if(info < 0) { return false; }
459
460 // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
461
462 sword sign = (is_cx<eT>::no) ? ( (access::tmp_real( A.at(0,0) ) < T(0)) ? -1 : +1 ) : +1;
463 eT val = (is_cx<eT>::no) ? std::log( (access::tmp_real( A.at(0,0) ) < T(0)) ? A.at(0,0)*T(-1) : A.at(0,0) ) : std::log( A.at(0,0) );
464
465 for(uword i=1; i < A.n_rows; ++i)
466 {
467 const eT x = A.at(i,i);
468
469 sign *= (is_cx<eT>::no) ? ( (access::tmp_real(x) < T(0)) ? -1 : +1 ) : +1;
470 val += (is_cx<eT>::no) ? std::log( (access::tmp_real(x) < T(0)) ? x*T(-1) : x ) : std::log(x);
471 }
472
473 for(uword i=0; i < A.n_rows; ++i)
474 {
475 if( int(i) != ipiv.mem[i] ) // NOTE: no adjustment required, as the clapack version of getrf() assumes counting from 0
476 {
477 sign *= -1;
478 }
479 }
480
481 out_val = val;
482 out_sign = T(sign);
483
484 return true;
485 }
486 #elif defined(ARMA_USE_LAPACK)
487 {
488 arma_debug_assert_blas_size(A);
489
490 podarray<blas_int> ipiv(A.n_rows);
491
492 blas_int info = 0;
493 blas_int n_rows = blas_int(A.n_rows);
494 blas_int n_cols = blas_int(A.n_cols);
495
496 arma_extra_debug_print("lapack::getrf()");
497 lapack::getrf(&n_rows, &n_cols, A.memptr(), &n_rows, ipiv.memptr(), &info);
498
499 if(info < 0) { return false; }
500
501 // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
502
503 sword sign = (is_cx<eT>::no) ? ( (access::tmp_real( A.at(0,0) ) < T(0)) ? -1 : +1 ) : +1;
504 eT val = (is_cx<eT>::no) ? std::log( (access::tmp_real( A.at(0,0) ) < T(0)) ? A.at(0,0)*T(-1) : A.at(0,0) ) : std::log( A.at(0,0) );
505
506 for(uword i=1; i < A.n_rows; ++i)
507 {
508 const eT x = A.at(i,i);
509
510 sign *= (is_cx<eT>::no) ? ( (access::tmp_real(x) < T(0)) ? -1 : +1 ) : +1;
511 val += (is_cx<eT>::no) ? std::log( (access::tmp_real(x) < T(0)) ? x*T(-1) : x ) : std::log(x);
512 }
513
514 for(uword i=0; i < A.n_rows; ++i)
515 {
516 if( blas_int(i) != (ipiv.mem[i] - 1) ) // NOTE: adjustment of -1 is required as Fortran counts from 1
517 {
518 sign *= -1;
519 }
520 }
521
522 out_val = val;
523 out_sign = T(sign);
524
525 return true;
526 }
527 #else
528 {
529 arma_ignore(A);
530 arma_ignore(out_val);
531 arma_ignore(out_sign);
532 arma_stop_logic_error("log_det(): use of ATLAS or LAPACK must be enabled");
533 return false;
534 }
535 #endif
536 }
537
538
539
540 template<typename eT>
541 inline
542 bool
log_det_sympd(typename get_pod_type<eT>::result & out_val,Mat<eT> & A)543 auxlib::log_det_sympd(typename get_pod_type<eT>::result& out_val, Mat<eT>& A)
544 {
545 arma_extra_debug_sigprint();
546
547 typedef typename get_pod_type<eT>::result T;
548
549 if(A.is_empty()) { out_val = T(0); return true; }
550
551 #if defined(ARMA_USE_ATLAS)
552 {
553 arma_debug_assert_atlas_size(A);
554
555 int info = 0;
556
557 arma_extra_debug_print("atlas::clapack_potrf()");
558 info = atlas::clapack_potrf(atlas::CblasColMajor, atlas::CblasLower, A.n_rows, A.memptr(), A.n_rows);
559
560 if(info != 0) { return false; }
561
562 T val = std::log( access::tmp_real(A.at(0,0)) );
563
564 for(uword i=1; i < A.n_rows; ++i) { val += std::log( access::tmp_real(A.at(i,i)) ); }
565
566 out_val = T(2) * val;
567
568 return true;
569 }
570 #elif defined(ARMA_USE_LAPACK)
571 {
572 arma_debug_assert_blas_size(A);
573
574 char uplo = 'L';
575 blas_int n = blas_int(A.n_rows);
576 blas_int info = 0;
577
578 arma_extra_debug_print("lapack::potrf()");
579 lapack::potrf(&uplo, &n, A.memptr(), &n, &info);
580
581 if(info != 0) { return false; }
582
583 T val = std::log( access::tmp_real(A.at(0,0)) );
584
585 for(uword i=1; i < A.n_rows; ++i) { val += std::log( access::tmp_real(A.at(i,i)) ); }
586
587 out_val = T(2) * val;
588
589 return true;
590 }
591 #else
592 {
593 arma_ignore(out_val);
594 arma_ignore(A);
595 arma_stop_logic_error("det(): use of ATLAS or LAPACK must be enabled");
596 return false;
597 }
598 #endif
599 }
600
601
602
603 //! LU decomposition of a matrix
604 template<typename eT, typename T1>
605 inline
606 bool
lu(Mat<eT> & L,Mat<eT> & U,podarray<blas_int> & ipiv,const Base<eT,T1> & X)607 auxlib::lu(Mat<eT>& L, Mat<eT>& U, podarray<blas_int>& ipiv, const Base<eT,T1>& X)
608 {
609 arma_extra_debug_sigprint();
610
611 U = X.get_ref();
612
613 const uword U_n_rows = U.n_rows;
614 const uword U_n_cols = U.n_cols;
615
616 if(U.is_empty())
617 {
618 L.set_size(U_n_rows, 0);
619 U.set_size(0, U_n_cols);
620 ipiv.reset();
621 return true;
622 }
623
624 #if defined(ARMA_USE_ATLAS) || defined(ARMA_USE_LAPACK)
625 {
626 #if defined(ARMA_USE_ATLAS)
627 {
628 arma_debug_assert_atlas_size(U);
629
630 ipiv.set_size( (std::min)(U_n_rows, U_n_cols) );
631
632 arma_extra_debug_print("atlas::clapack_getrf()");
633 int info = atlas::clapack_getrf(atlas::CblasColMajor, U_n_rows, U_n_cols, U.memptr(), U_n_rows, ipiv.memptr());
634
635 if(info < 0) { return false; }
636 }
637 #elif defined(ARMA_USE_LAPACK)
638 {
639 arma_debug_assert_blas_size(U);
640
641 ipiv.set_size( (std::min)(U_n_rows, U_n_cols) );
642
643 blas_int info = 0;
644
645 blas_int n_rows = blas_int(U_n_rows);
646 blas_int n_cols = blas_int(U_n_cols);
647
648 arma_extra_debug_print("lapack::getrf()");
649 lapack::getrf(&n_rows, &n_cols, U.memptr(), &n_rows, ipiv.memptr(), &info);
650
651 if(info < 0) { return false; }
652
653 // take into account that Fortran counts from 1
654 arrayops::inplace_minus(ipiv.memptr(), blas_int(1), ipiv.n_elem);
655 }
656 #endif
657
658 L.copy_size(U);
659
660 for(uword col=0; col < U_n_cols; ++col)
661 {
662 for(uword row=0; (row < col) && (row < U_n_rows); ++row)
663 {
664 L.at(row,col) = eT(0);
665 }
666
667 if( L.in_range(col,col) )
668 {
669 L.at(col,col) = eT(1);
670 }
671
672 for(uword row = (col+1); row < U_n_rows; ++row)
673 {
674 L.at(row,col) = U.at(row,col);
675 U.at(row,col) = eT(0);
676 }
677 }
678
679 return true;
680 }
681 #else
682 {
683 arma_stop_logic_error("lu(): use of ATLAS or LAPACK must be enabled");
684 return false;
685 }
686 #endif
687 }
688
689
690
691 template<typename eT, typename T1>
692 inline
693 bool
lu(Mat<eT> & L,Mat<eT> & U,Mat<eT> & P,const Base<eT,T1> & X)694 auxlib::lu(Mat<eT>& L, Mat<eT>& U, Mat<eT>& P, const Base<eT,T1>& X)
695 {
696 arma_extra_debug_sigprint();
697
698 podarray<blas_int> ipiv1;
699 const bool status = auxlib::lu(L, U, ipiv1, X);
700
701 if(status == false) { return false; }
702
703 if(U.is_empty())
704 {
705 // L and U have been already set to the correct empty matrices
706 P.eye(L.n_rows, L.n_rows);
707 return true;
708 }
709
710 const uword n = ipiv1.n_elem;
711 const uword P_rows = U.n_rows;
712
713 podarray<blas_int> ipiv2(P_rows);
714
715 const blas_int* ipiv1_mem = ipiv1.memptr();
716 blas_int* ipiv2_mem = ipiv2.memptr();
717
718 for(uword i=0; i<P_rows; ++i)
719 {
720 ipiv2_mem[i] = blas_int(i);
721 }
722
723 for(uword i=0; i<n; ++i)
724 {
725 const uword k = static_cast<uword>(ipiv1_mem[i]);
726
727 if( ipiv2_mem[i] != ipiv2_mem[k] )
728 {
729 std::swap( ipiv2_mem[i], ipiv2_mem[k] );
730 }
731 }
732
733 P.zeros(P_rows, P_rows);
734
735 for(uword row=0; row<P_rows; ++row)
736 {
737 P.at(row, static_cast<uword>(ipiv2_mem[row])) = eT(1);
738 }
739
740 if(L.n_cols > U.n_rows)
741 {
742 L.shed_cols(U.n_rows, L.n_cols-1);
743 }
744
745 if(U.n_rows > L.n_cols)
746 {
747 U.shed_rows(L.n_cols, U.n_rows-1);
748 }
749
750 return true;
751 }
752
753
754
755 template<typename eT, typename T1>
756 inline
757 bool
lu(Mat<eT> & L,Mat<eT> & U,const Base<eT,T1> & X)758 auxlib::lu(Mat<eT>& L, Mat<eT>& U, const Base<eT,T1>& X)
759 {
760 arma_extra_debug_sigprint();
761
762 podarray<blas_int> ipiv1;
763 const bool status = auxlib::lu(L, U, ipiv1, X);
764
765 if(status == false) { return false; }
766
767 if(U.is_empty())
768 {
769 // L and U have been already set to the correct empty matrices
770 return true;
771 }
772
773 const uword n = ipiv1.n_elem;
774 const uword P_rows = U.n_rows;
775
776 podarray<blas_int> ipiv2(P_rows);
777
778 const blas_int* ipiv1_mem = ipiv1.memptr();
779 blas_int* ipiv2_mem = ipiv2.memptr();
780
781 for(uword i=0; i<P_rows; ++i)
782 {
783 ipiv2_mem[i] = blas_int(i);
784 }
785
786 for(uword i=0; i<n; ++i)
787 {
788 const uword k = static_cast<uword>(ipiv1_mem[i]);
789
790 if( ipiv2_mem[i] != ipiv2_mem[k] )
791 {
792 std::swap( ipiv2_mem[i], ipiv2_mem[k] );
793 L.swap_rows( static_cast<uword>(ipiv2_mem[i]), static_cast<uword>(ipiv2_mem[k]) );
794 }
795 }
796
797 if(L.n_cols > U.n_rows)
798 {
799 L.shed_cols(U.n_rows, L.n_cols-1);
800 }
801
802 if(U.n_rows > L.n_cols)
803 {
804 U.shed_rows(L.n_cols, U.n_rows-1);
805 }
806
807 return true;
808 }
809
810
811
812 //! eigen decomposition of general square matrix (real)
813 template<typename T1>
814 inline
815 bool
eig_gen(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & vecs,const bool vecs_on,const Base<typename T1::pod_type,T1> & expr)816 auxlib::eig_gen
817 (
818 Mat< std::complex<typename T1::pod_type> >& vals,
819 Mat< std::complex<typename T1::pod_type> >& vecs,
820 const bool vecs_on,
821 const Base<typename T1::pod_type,T1>& expr
822 )
823 {
824 arma_extra_debug_sigprint();
825
826 #if defined(ARMA_USE_LAPACK)
827 {
828 typedef typename T1::pod_type T;
829
830 Mat<T> X = expr.get_ref();
831
832 arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
833
834 arma_debug_assert_blas_size(X);
835
836 if(X.is_empty())
837 {
838 vals.reset();
839 vecs.reset();
840 return true;
841 }
842
843 if(X.is_finite() == false) { return false; }
844
845 vals.set_size(X.n_rows, 1);
846
847 Mat<T> tmp(1, 1, arma_nozeros_indicator());
848
849 if(vecs_on)
850 {
851 vecs.set_size(X.n_rows, X.n_rows);
852 tmp.set_size(X.n_rows, X.n_rows);
853 }
854
855 podarray<T> junk(1);
856
857 char jobvl = 'N';
858 char jobvr = (vecs_on) ? 'V' : 'N';
859 blas_int N = blas_int(X.n_rows);
860 T* vl = junk.memptr();
861 T* vr = (vecs_on) ? tmp.memptr() : junk.memptr();
862 blas_int ldvl = blas_int(1);
863 blas_int ldvr = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1);
864 blas_int lwork = 64*N; // lwork_min = (vecs_on) ? (std::max)(blas_int(1), 4*N) : (std::max)(blas_int(1), 3*N)
865 blas_int info = 0;
866
867 podarray<T> work( static_cast<uword>(lwork) );
868
869 podarray<T> vals_real(X.n_rows);
870 podarray<T> vals_imag(X.n_rows);
871
872 arma_extra_debug_print("lapack::geev() -- START");
873 lapack::geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, &info);
874 arma_extra_debug_print("lapack::geev() -- END");
875
876 if(info != 0) { return false; }
877
878 arma_extra_debug_print("reformatting eigenvalues and eigenvectors");
879
880 std::complex<T>* vals_mem = vals.memptr();
881
882 for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); }
883
884 if(vecs_on)
885 {
886 for(uword j=0; j < X.n_rows; ++j)
887 {
888 if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
889 {
890 for(uword i=0; i < X.n_rows; ++i)
891 {
892 vecs.at(i,j) = std::complex<T>( tmp.at(i,j), tmp.at(i,j+1) );
893 vecs.at(i,j+1) = std::complex<T>( tmp.at(i,j), -tmp.at(i,j+1) );
894 }
895
896 ++j;
897 }
898 else
899 {
900 for(uword i=0; i<X.n_rows; ++i)
901 {
902 vecs.at(i,j) = std::complex<T>(tmp.at(i,j), T(0));
903 }
904 }
905 }
906 }
907
908 return true;
909 }
910 #else
911 {
912 arma_ignore(vals);
913 arma_ignore(vecs);
914 arma_ignore(vecs_on);
915 arma_ignore(expr);
916 arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
917 return false;
918 }
919 #endif
920 }
921
922
923
924 //! eigen decomposition of general square matrix (complex)
925 template<typename T1>
926 inline
927 bool
eig_gen(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & vecs,const bool vecs_on,const Base<std::complex<typename T1::pod_type>,T1> & expr)928 auxlib::eig_gen
929 (
930 Mat< std::complex<typename T1::pod_type> >& vals,
931 Mat< std::complex<typename T1::pod_type> >& vecs,
932 const bool vecs_on,
933 const Base< std::complex<typename T1::pod_type>, T1 >& expr
934 )
935 {
936 arma_extra_debug_sigprint();
937
938 #if defined(ARMA_USE_LAPACK)
939 {
940 typedef typename T1::pod_type T;
941 typedef typename std::complex<T> eT;
942
943 Mat<eT> X = expr.get_ref();
944
945 arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
946
947 arma_debug_assert_blas_size(X);
948
949 if(X.is_empty())
950 {
951 vals.reset();
952 vecs.reset();
953 return true;
954 }
955
956 if(X.is_finite() == false) { return false; }
957
958 vals.set_size(X.n_rows, 1);
959
960 if(vecs_on) { vecs.set_size(X.n_rows, X.n_rows); }
961
962 podarray<eT> junk(1);
963
964 char jobvl = 'N';
965 char jobvr = (vecs_on) ? 'V' : 'N';
966 blas_int N = blas_int(X.n_rows);
967 eT* vl = junk.memptr();
968 eT* vr = (vecs_on) ? vecs.memptr() : junk.memptr();
969 blas_int ldvl = blas_int(1);
970 blas_int ldvr = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1);
971 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 2*N)
972 blas_int info = 0;
973
974 podarray<eT> work( static_cast<uword>(lwork) );
975 podarray< T> rwork( static_cast<uword>(2*N) );
976
977 arma_extra_debug_print("lapack::cx_geev() -- START");
978 lapack::cx_geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, rwork.memptr(), &info);
979 arma_extra_debug_print("lapack::cx_geev() -- END");
980
981 return (info == 0);
982 }
983 #else
984 {
985 arma_ignore(vals);
986 arma_ignore(vecs);
987 arma_ignore(vecs_on);
988 arma_ignore(expr);
989 arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
990 return false;
991 }
992 #endif
993 }
994
995
996
997 //! eigen decomposition of general square matrix (real, balance given matrix)
998 template<typename T1>
999 inline
1000 bool
eig_gen_balance(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & vecs,const bool vecs_on,const Base<typename T1::pod_type,T1> & expr)1001 auxlib::eig_gen_balance
1002 (
1003 Mat< std::complex<typename T1::pod_type> >& vals,
1004 Mat< std::complex<typename T1::pod_type> >& vecs,
1005 const bool vecs_on,
1006 const Base<typename T1::pod_type,T1>& expr
1007 )
1008 {
1009 arma_extra_debug_sigprint();
1010
1011 #if defined(ARMA_USE_LAPACK)
1012 {
1013 typedef typename T1::pod_type T;
1014
1015 Mat<T> X = expr.get_ref();
1016
1017 arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
1018
1019 arma_debug_assert_blas_size(X);
1020
1021 if(X.is_empty())
1022 {
1023 vals.reset();
1024 vecs.reset();
1025 return true;
1026 }
1027
1028 if(X.is_finite() == false) { return false; }
1029
1030 vals.set_size(X.n_rows, 1);
1031
1032 Mat<T> tmp(1, 1, arma_nozeros_indicator());
1033
1034 if(vecs_on)
1035 {
1036 vecs.set_size(X.n_rows, X.n_rows);
1037 tmp.set_size(X.n_rows, X.n_rows);
1038 }
1039
1040 podarray<T> junk(1);
1041
1042 char bal = 'B';
1043 char jobvl = 'N';
1044 char jobvr = (vecs_on) ? 'V' : 'N';
1045 char sense = 'N';
1046 blas_int N = blas_int(X.n_rows);
1047 T* vl = junk.memptr();
1048 T* vr = (vecs_on) ? tmp.memptr() : junk.memptr();
1049 blas_int ldvl = blas_int(1);
1050 blas_int ldvr = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1);
1051 blas_int ilo = blas_int(0);
1052 blas_int ihi = blas_int(0);
1053 T abnrm = T(0);
1054 blas_int lwork = 64*N; // lwork_min = (vecs_on) ? (std::max)(blas_int(1), 2*N) : (std::max)(blas_int(1), 3*N)
1055 blas_int info = blas_int(0);
1056
1057 podarray<T> scale(X.n_rows);
1058 podarray<T> rconde(X.n_rows);
1059 podarray<T> rcondv(X.n_rows);
1060
1061 podarray<T> work( static_cast<uword>(lwork) );
1062 podarray<blas_int> iwork( uword(1) ); // iwork not used by lapack::geevx() as sense = 'N'
1063
1064 podarray<T> vals_real(X.n_rows);
1065 podarray<T> vals_imag(X.n_rows);
1066
1067 arma_extra_debug_print("lapack::geevx() -- START");
1068 lapack::geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), vl, &ldvl, vr, &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, iwork.memptr(), &info);
1069 arma_extra_debug_print("lapack::geevx() -- END");
1070
1071 if(info != 0) { return false; }
1072
1073 arma_extra_debug_print("reformatting eigenvalues and eigenvectors");
1074
1075 std::complex<T>* vals_mem = vals.memptr();
1076
1077 for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); }
1078
1079 if(vecs_on)
1080 {
1081 for(uword j=0; j < X.n_rows; ++j)
1082 {
1083 if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
1084 {
1085 for(uword i=0; i < X.n_rows; ++i)
1086 {
1087 vecs.at(i,j) = std::complex<T>( tmp.at(i,j), tmp.at(i,j+1) );
1088 vecs.at(i,j+1) = std::complex<T>( tmp.at(i,j), -tmp.at(i,j+1) );
1089 }
1090
1091 ++j;
1092 }
1093 else
1094 {
1095 for(uword i=0; i<X.n_rows; ++i)
1096 {
1097 vecs.at(i,j) = std::complex<T>(tmp.at(i,j), T(0));
1098 }
1099 }
1100 }
1101 }
1102
1103 return true;
1104 }
1105 #else
1106 {
1107 arma_ignore(vals);
1108 arma_ignore(vecs);
1109 arma_ignore(vecs_on);
1110 arma_ignore(expr);
1111 arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
1112 return false;
1113 }
1114 #endif
1115 }
1116
1117
1118
1119 //! eigen decomposition of general square matrix (complex, balance given matrix)
1120 template<typename T1>
1121 inline
1122 bool
eig_gen_balance(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & vecs,const bool vecs_on,const Base<std::complex<typename T1::pod_type>,T1> & expr)1123 auxlib::eig_gen_balance
1124 (
1125 Mat< std::complex<typename T1::pod_type> >& vals,
1126 Mat< std::complex<typename T1::pod_type> >& vecs,
1127 const bool vecs_on,
1128 const Base< std::complex<typename T1::pod_type>, T1 >& expr
1129 )
1130 {
1131 arma_extra_debug_sigprint();
1132
1133 #if defined(ARMA_CRIPPLED_LAPACK)
1134 {
1135 arma_extra_debug_print("auxlib::eig_gen_balance(): redirecting to auxlib::eig_gen() due to crippled LAPACK");
1136
1137 return auxlib::eig_gen(vals, vecs, vecs_on, expr);
1138 }
1139 #elif defined(ARMA_USE_LAPACK)
1140 {
1141 typedef typename T1::pod_type T;
1142 typedef typename std::complex<T> eT;
1143
1144 Mat<eT> X = expr.get_ref();
1145
1146 arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
1147
1148 arma_debug_assert_blas_size(X);
1149
1150 if(X.is_empty())
1151 {
1152 vals.reset();
1153 vecs.reset();
1154 return true;
1155 }
1156
1157 if(X.is_finite() == false) { return false; }
1158
1159 vals.set_size(X.n_rows, 1);
1160
1161 if(vecs_on) { vecs.set_size(X.n_rows, X.n_rows); }
1162
1163 podarray<eT> junk(1);
1164
1165 char bal = 'B';
1166 char jobvl = 'N';
1167 char jobvr = (vecs_on) ? 'V' : 'N';
1168 char sense = 'N';
1169 blas_int N = blas_int(X.n_rows);
1170 eT* vl = junk.memptr();
1171 eT* vr = (vecs_on) ? vecs.memptr() : junk.memptr();
1172 blas_int ldvl = blas_int(1);
1173 blas_int ldvr = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1);
1174 blas_int ilo = blas_int(0);
1175 blas_int ihi = blas_int(0);
1176 T abnrm = T(0);
1177 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), blas_int(2*N))
1178 blas_int info = blas_int(0);
1179
1180 podarray<T> scale(X.n_rows);
1181 podarray<T> rconde(X.n_rows);
1182 podarray<T> rcondv(X.n_rows);
1183
1184 podarray<eT> work( static_cast<uword>(lwork) );
1185 podarray< T> rwork( static_cast<uword>(2*N) );
1186
1187 arma_extra_debug_print("lapack::cx_geevx() -- START");
1188 lapack::cx_geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals.memptr(), vl, &ldvl, vr, &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
1189 arma_extra_debug_print("lapack::cx_geevx() -- END");
1190
1191 return (info == 0);
1192 }
1193 #else
1194 {
1195 arma_ignore(vals);
1196 arma_ignore(vecs);
1197 arma_ignore(vecs_on);
1198 arma_ignore(expr);
1199 arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
1200 return false;
1201 }
1202 #endif
1203 }
1204
1205
1206
1207 //! two-sided eigen decomposition of general square matrix (real)
1208 template<typename T1>
1209 inline
1210 bool
eig_gen_twosided(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & lvecs,Mat<std::complex<typename T1::pod_type>> & rvecs,const Base<typename T1::pod_type,T1> & expr)1211 auxlib::eig_gen_twosided
1212 (
1213 Mat< std::complex<typename T1::pod_type> >& vals,
1214 Mat< std::complex<typename T1::pod_type> >& lvecs,
1215 Mat< std::complex<typename T1::pod_type> >& rvecs,
1216 const Base<typename T1::pod_type,T1>& expr
1217 )
1218 {
1219 arma_extra_debug_sigprint();
1220
1221 #if defined(ARMA_USE_LAPACK)
1222 {
1223 typedef typename T1::pod_type T;
1224
1225 Mat<T> X = expr.get_ref();
1226
1227 arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
1228
1229 arma_debug_assert_blas_size(X);
1230
1231 if(X.is_empty())
1232 {
1233 vals.reset();
1234 lvecs.reset();
1235 rvecs.reset();
1236 return true;
1237 }
1238
1239 if(X.is_finite() == false) { return false; }
1240
1241 vals.set_size(X.n_rows, 1);
1242
1243 lvecs.set_size(X.n_rows, X.n_rows);
1244 rvecs.set_size(X.n_rows, X.n_rows);
1245
1246 Mat<T> ltmp(X.n_rows, X.n_rows, arma_nozeros_indicator());
1247 Mat<T> rtmp(X.n_rows, X.n_rows, arma_nozeros_indicator());
1248
1249 char jobvl = 'V';
1250 char jobvr = 'V';
1251 blas_int N = blas_int(X.n_rows);
1252 blas_int ldvl = blas_int(ltmp.n_rows);
1253 blas_int ldvr = blas_int(rtmp.n_rows);
1254 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 4*N)
1255 blas_int info = 0;
1256
1257 podarray<T> work( static_cast<uword>(lwork) );
1258
1259 podarray<T> vals_real(X.n_rows);
1260 podarray<T> vals_imag(X.n_rows);
1261
1262 arma_extra_debug_print("lapack::geev() -- START");
1263 lapack::geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, work.memptr(), &lwork, &info);
1264 arma_extra_debug_print("lapack::geev() -- END");
1265
1266 if(info != 0) { return false; }
1267
1268 arma_extra_debug_print("reformatting eigenvalues and eigenvectors");
1269
1270 std::complex<T>* vals_mem = vals.memptr();
1271
1272 for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); }
1273
1274 for(uword j=0; j < X.n_rows; ++j)
1275 {
1276 if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
1277 {
1278 for(uword i=0; i < X.n_rows; ++i)
1279 {
1280 lvecs.at(i,j) = std::complex<T>( ltmp.at(i,j), ltmp.at(i,j+1) );
1281 lvecs.at(i,j+1) = std::complex<T>( ltmp.at(i,j), -ltmp.at(i,j+1) );
1282 rvecs.at(i,j) = std::complex<T>( rtmp.at(i,j), rtmp.at(i,j+1) );
1283 rvecs.at(i,j+1) = std::complex<T>( rtmp.at(i,j), -rtmp.at(i,j+1) );
1284 }
1285 ++j;
1286 }
1287 else
1288 {
1289 for(uword i=0; i<X.n_rows; ++i)
1290 {
1291 lvecs.at(i,j) = std::complex<T>(ltmp.at(i,j), T(0));
1292 rvecs.at(i,j) = std::complex<T>(rtmp.at(i,j), T(0));
1293 }
1294 }
1295 }
1296
1297 return true;
1298 }
1299 #else
1300 {
1301 arma_ignore(vals);
1302 arma_ignore(lvecs);
1303 arma_ignore(rvecs);
1304 arma_ignore(expr);
1305 arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
1306 return false;
1307 }
1308 #endif
1309 }
1310
1311
1312
1313 //! two-sided eigen decomposition of general square matrix (complex)
1314 template<typename T1>
1315 inline
1316 bool
eig_gen_twosided(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & lvecs,Mat<std::complex<typename T1::pod_type>> & rvecs,const Base<std::complex<typename T1::pod_type>,T1> & expr)1317 auxlib::eig_gen_twosided
1318 (
1319 Mat< std::complex<typename T1::pod_type> >& vals,
1320 Mat< std::complex<typename T1::pod_type> >& lvecs,
1321 Mat< std::complex<typename T1::pod_type> >& rvecs,
1322 const Base< std::complex<typename T1::pod_type>, T1 >& expr
1323 )
1324 {
1325 arma_extra_debug_sigprint();
1326
1327 #if defined(ARMA_USE_LAPACK)
1328 {
1329 typedef typename T1::pod_type T;
1330 typedef typename std::complex<T> eT;
1331
1332 Mat<eT> X = expr.get_ref();
1333
1334 arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
1335
1336 arma_debug_assert_blas_size(X);
1337
1338 if(X.is_empty())
1339 {
1340 vals.reset();
1341 lvecs.reset();
1342 rvecs.reset();
1343 return true;
1344 }
1345
1346 if(X.is_finite() == false) { return false; }
1347
1348 vals.set_size(X.n_rows, 1);
1349
1350 lvecs.set_size(X.n_rows, X.n_rows);
1351 rvecs.set_size(X.n_rows, X.n_rows);
1352
1353 char jobvl = 'V';
1354 char jobvr = 'V';
1355 blas_int N = blas_int(X.n_rows);
1356 blas_int ldvl = blas_int(lvecs.n_rows);
1357 blas_int ldvr = blas_int(rvecs.n_rows);
1358 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 2*N)
1359 blas_int info = 0;
1360
1361 podarray<eT> work( static_cast<uword>(lwork) );
1362 podarray< T> rwork( static_cast<uword>(2*N) );
1363
1364 arma_extra_debug_print("lapack::cx_geev() -- START");
1365 lapack::cx_geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, work.memptr(), &lwork, rwork.memptr(), &info);
1366 arma_extra_debug_print("lapack::cx_geev() -- END");
1367
1368 return (info == 0);
1369 }
1370 #else
1371 {
1372 arma_ignore(vals);
1373 arma_ignore(lvecs);
1374 arma_ignore(rvecs);
1375 arma_ignore(expr);
1376 arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
1377 return false;
1378 }
1379 #endif
1380 }
1381
1382
1383
1384 //! two-sided eigen decomposition of general square matrix (real, balance given matrix)
1385 template<typename T1>
1386 inline
1387 bool
eig_gen_twosided_balance(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & lvecs,Mat<std::complex<typename T1::pod_type>> & rvecs,const Base<typename T1::pod_type,T1> & expr)1388 auxlib::eig_gen_twosided_balance
1389 (
1390 Mat< std::complex<typename T1::pod_type> >& vals,
1391 Mat< std::complex<typename T1::pod_type> >& lvecs,
1392 Mat< std::complex<typename T1::pod_type> >& rvecs,
1393 const Base<typename T1::pod_type,T1>& expr
1394 )
1395 {
1396 arma_extra_debug_sigprint();
1397
1398 #if defined(ARMA_USE_LAPACK)
1399 {
1400 typedef typename T1::pod_type T;
1401
1402 Mat<T> X = expr.get_ref();
1403
1404 arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
1405
1406 arma_debug_assert_blas_size(X);
1407
1408 if(X.is_empty())
1409 {
1410 vals.reset();
1411 lvecs.reset();
1412 rvecs.reset();
1413 return true;
1414 }
1415
1416 if(X.is_finite() == false) { return false; }
1417
1418 vals.set_size(X.n_rows, 1);
1419
1420 lvecs.set_size(X.n_rows, X.n_rows);
1421 rvecs.set_size(X.n_rows, X.n_rows);
1422
1423 Mat<T> ltmp(X.n_rows, X.n_rows, arma_nozeros_indicator());
1424 Mat<T> rtmp(X.n_rows, X.n_rows, arma_nozeros_indicator());
1425
1426 char bal = 'B';
1427 char jobvl = 'V';
1428 char jobvr = 'V';
1429 char sense = 'N';
1430 blas_int N = blas_int(X.n_rows);
1431 blas_int ldvl = blas_int(ltmp.n_rows);
1432 blas_int ldvr = blas_int(rtmp.n_rows);
1433 blas_int ilo = blas_int(0);
1434 blas_int ihi = blas_int(0);
1435 T abnrm = T(0);
1436 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), blas_int(3*N))
1437 blas_int info = blas_int(0);
1438
1439 podarray<T> scale(X.n_rows);
1440 podarray<T> rconde(X.n_rows);
1441 podarray<T> rcondv(X.n_rows);
1442
1443 podarray<T> work( static_cast<uword>(lwork) );
1444 podarray<blas_int> iwork( uword(1) ); // iwork not used by lapack::geevx() as sense = 'N'
1445
1446 podarray<T> vals_real(X.n_rows);
1447 podarray<T> vals_imag(X.n_rows);
1448
1449 arma_extra_debug_print("lapack::geevx() -- START");
1450 lapack::geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, iwork.memptr(), &info);
1451 arma_extra_debug_print("lapack::geevx() -- END");
1452
1453 if(info != 0) { return false; }
1454
1455 arma_extra_debug_print("reformatting eigenvalues and eigenvectors");
1456
1457 std::complex<T>* vals_mem = vals.memptr();
1458
1459 for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); }
1460
1461 for(uword j=0; j < X.n_rows; ++j)
1462 {
1463 if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
1464 {
1465 for(uword i=0; i < X.n_rows; ++i)
1466 {
1467 lvecs.at(i,j) = std::complex<T>( ltmp.at(i,j), ltmp.at(i,j+1) );
1468 lvecs.at(i,j+1) = std::complex<T>( ltmp.at(i,j), -ltmp.at(i,j+1) );
1469 rvecs.at(i,j) = std::complex<T>( rtmp.at(i,j), rtmp.at(i,j+1) );
1470 rvecs.at(i,j+1) = std::complex<T>( rtmp.at(i,j), -rtmp.at(i,j+1) );
1471 }
1472 ++j;
1473 }
1474 else
1475 {
1476 for(uword i=0; i<X.n_rows; ++i)
1477 {
1478 lvecs.at(i,j) = std::complex<T>(ltmp.at(i,j), T(0));
1479 rvecs.at(i,j) = std::complex<T>(rtmp.at(i,j), T(0));
1480 }
1481 }
1482 }
1483
1484 return true;
1485 }
1486 #else
1487 {
1488 arma_ignore(vals);
1489 arma_ignore(lvecs);
1490 arma_ignore(rvecs);
1491 arma_ignore(expr);
1492 arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
1493 return false;
1494 }
1495 #endif
1496 }
1497
1498
1499
1500 //! two-sided eigen decomposition of general square matrix (complex, balance given matrix)
1501 template<typename T1>
1502 inline
1503 bool
eig_gen_twosided_balance(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & lvecs,Mat<std::complex<typename T1::pod_type>> & rvecs,const Base<std::complex<typename T1::pod_type>,T1> & expr)1504 auxlib::eig_gen_twosided_balance
1505 (
1506 Mat< std::complex<typename T1::pod_type> >& vals,
1507 Mat< std::complex<typename T1::pod_type> >& lvecs,
1508 Mat< std::complex<typename T1::pod_type> >& rvecs,
1509 const Base< std::complex<typename T1::pod_type>, T1 >& expr
1510 )
1511 {
1512 arma_extra_debug_sigprint();
1513
1514 #if defined(ARMA_CRIPPLED_LAPACK)
1515 {
1516 arma_extra_debug_print("auxlib::eig_gen_twosided_balance(): redirecting to auxlib::eig_gen() due to crippled LAPACK");
1517
1518 return auxlib::eig_gen(vals, lvecs, rvecs, expr);
1519 }
1520 #elif defined(ARMA_USE_LAPACK)
1521 {
1522 typedef typename T1::pod_type T;
1523 typedef typename std::complex<T> eT;
1524
1525 Mat<eT> X = expr.get_ref();
1526
1527 arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" );
1528
1529 arma_debug_assert_blas_size(X);
1530
1531 if(X.is_empty())
1532 {
1533 vals.reset();
1534 lvecs.reset();
1535 rvecs.reset();
1536 return true;
1537 }
1538
1539 if(X.is_finite() == false) { return false; }
1540
1541 vals.set_size(X.n_rows, 1);
1542
1543 lvecs.set_size(X.n_rows, X.n_rows);
1544 rvecs.set_size(X.n_rows, X.n_rows);
1545
1546 char bal = 'B';
1547 char jobvl = 'V';
1548 char jobvr = 'V';
1549 char sense = 'N';
1550 blas_int N = blas_int(X.n_rows);
1551 blas_int ldvl = blas_int(lvecs.n_rows);
1552 blas_int ldvr = blas_int(rvecs.n_rows);
1553 blas_int ilo = blas_int(0);
1554 blas_int ihi = blas_int(0);
1555 T abnrm = T(0);
1556 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), blas_int(2*N))
1557 blas_int info = blas_int(0);
1558
1559 podarray<T> scale(X.n_rows);
1560 podarray<T> rconde(X.n_rows);
1561 podarray<T> rcondv(X.n_rows);
1562
1563 podarray<eT> work( static_cast<uword>(lwork) );
1564 podarray< T> rwork( static_cast<uword>(2*N) );
1565
1566 arma_extra_debug_print("lapack::cx_geevx() -- START");
1567 lapack::cx_geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
1568 arma_extra_debug_print("lapack::cx_geevx() -- END");
1569
1570 return (info == 0);
1571 }
1572 #else
1573 {
1574 arma_ignore(vals);
1575 arma_ignore(lvecs);
1576 arma_ignore(rvecs);
1577 arma_ignore(expr);
1578 arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled");
1579 return false;
1580 }
1581 #endif
1582 }
1583
1584
1585
1586 //! eigendecomposition of general square matrix pair (real)
1587 template<typename T1, typename T2>
1588 inline
1589 bool
eig_pair(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & vecs,const bool vecs_on,const Base<typename T1::pod_type,T1> & A_expr,const Base<typename T1::pod_type,T2> & B_expr)1590 auxlib::eig_pair
1591 (
1592 Mat< std::complex<typename T1::pod_type> >& vals,
1593 Mat< std::complex<typename T1::pod_type> >& vecs,
1594 const bool vecs_on,
1595 const Base<typename T1::pod_type,T1>& A_expr,
1596 const Base<typename T1::pod_type,T2>& B_expr
1597 )
1598 {
1599 arma_extra_debug_sigprint();
1600
1601 #if defined(ARMA_USE_LAPACK)
1602 {
1603 typedef typename T1::pod_type T;
1604 typedef std::complex<T> eT;
1605
1606 Mat<T> A(A_expr.get_ref());
1607 Mat<T> B(B_expr.get_ref());
1608
1609 arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" );
1610
1611 arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" );
1612
1613 arma_debug_assert_blas_size(A);
1614
1615 if(A.is_empty())
1616 {
1617 vals.reset();
1618 vecs.reset();
1619 return true;
1620 }
1621
1622 if(A.is_finite() == false) { return false; }
1623 if(B.is_finite() == false) { return false; }
1624
1625 vals.set_size(A.n_rows, 1);
1626
1627 Mat<T> tmp(1, 1, arma_nozeros_indicator());
1628
1629 if(vecs_on)
1630 {
1631 vecs.set_size(A.n_rows, A.n_rows);
1632 tmp.set_size(A.n_rows, A.n_rows);
1633 }
1634
1635 podarray<T> junk(1);
1636
1637 char jobvl = 'N';
1638 char jobvr = (vecs_on) ? 'V' : 'N';
1639 blas_int N = blas_int(A.n_rows);
1640 T* vl = junk.memptr();
1641 T* vr = (vecs_on) ? tmp.memptr() : junk.memptr();
1642 blas_int ldvl = blas_int(1);
1643 blas_int ldvr = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1);
1644 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 8*N)
1645 blas_int info = 0;
1646
1647 podarray<T> alphar(A.n_rows);
1648 podarray<T> alphai(A.n_rows);
1649 podarray<T> beta(A.n_rows);
1650
1651 podarray<T> work( static_cast<uword>(lwork) );
1652
1653 arma_extra_debug_print("lapack::ggev()");
1654 lapack::ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alphar.memptr(), alphai.memptr(), beta.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, &info);
1655
1656 if(info != 0) { return false; }
1657
1658 arma_extra_debug_print("reformatting eigenvalues and eigenvectors");
1659
1660 eT* vals_mem = vals.memptr();
1661 const T* alphar_mem = alphar.memptr();
1662 const T* alphai_mem = alphai.memptr();
1663 const T* beta_mem = beta.memptr();
1664
1665 bool beta_has_zero = false;
1666
1667 for(uword j=0; j<A.n_rows; ++j)
1668 {
1669 const T alphai_val = alphai_mem[j];
1670 const T beta_val = beta_mem[j];
1671
1672 const T re = alphar_mem[j] / beta_val;
1673 const T im = alphai_val / beta_val;
1674
1675 beta_has_zero = (beta_has_zero || (beta_val == T(0)));
1676
1677 vals_mem[j] = std::complex<T>(re, im);
1678
1679 if( (alphai_val > T(0)) && (j < (A.n_rows-1)) )
1680 {
1681 ++j;
1682 vals_mem[j] = std::complex<T>(re,-im); // force exact conjugate
1683 }
1684 }
1685
1686 if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); }
1687
1688 if(vecs_on)
1689 {
1690 for(uword j=0; j<A.n_rows; ++j)
1691 {
1692 if( (j < (A.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
1693 {
1694 for(uword i=0; i<A.n_rows; ++i)
1695 {
1696 vecs.at(i,j) = std::complex<T>( tmp.at(i,j), tmp.at(i,j+1) );
1697 vecs.at(i,j+1) = std::complex<T>( tmp.at(i,j), -tmp.at(i,j+1) );
1698 }
1699
1700 ++j;
1701 }
1702 else
1703 {
1704 for(uword i=0; i<A.n_rows; ++i)
1705 {
1706 vecs.at(i,j) = std::complex<T>(tmp.at(i,j), T(0));
1707 }
1708 }
1709 }
1710 }
1711
1712 return true;
1713 }
1714 #else
1715 {
1716 arma_ignore(vals);
1717 arma_ignore(vecs);
1718 arma_ignore(vecs_on);
1719 arma_ignore(A_expr);
1720 arma_ignore(B_expr);
1721 arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled");
1722 return false;
1723 }
1724 #endif
1725 }
1726
1727
1728
1729 //! eigendecomposition of general square matrix pair (complex)
1730 template<typename T1, typename T2>
1731 inline
1732 bool
eig_pair(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & vecs,const bool vecs_on,const Base<std::complex<typename T1::pod_type>,T1> & A_expr,const Base<std::complex<typename T1::pod_type>,T2> & B_expr)1733 auxlib::eig_pair
1734 (
1735 Mat< std::complex<typename T1::pod_type> >& vals,
1736 Mat< std::complex<typename T1::pod_type> >& vecs,
1737 const bool vecs_on,
1738 const Base< std::complex<typename T1::pod_type>, T1 >& A_expr,
1739 const Base< std::complex<typename T1::pod_type>, T2 >& B_expr
1740 )
1741 {
1742 arma_extra_debug_sigprint();
1743
1744 #if defined(ARMA_USE_LAPACK)
1745 {
1746 typedef typename T1::pod_type T;
1747 typedef typename std::complex<T> eT;
1748
1749 Mat<eT> A(A_expr.get_ref());
1750 Mat<eT> B(B_expr.get_ref());
1751
1752 arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" );
1753
1754 arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" );
1755
1756 arma_debug_assert_blas_size(A);
1757
1758 if(A.is_empty())
1759 {
1760 vals.reset();
1761 vecs.reset();
1762 return true;
1763 }
1764
1765 if(A.is_finite() == false) { return false; }
1766 if(B.is_finite() == false) { return false; }
1767
1768 vals.set_size(A.n_rows, 1);
1769
1770 if(vecs_on) { vecs.set_size(A.n_rows, A.n_rows); }
1771
1772 podarray<eT> junk(1);
1773
1774 char jobvl = 'N';
1775 char jobvr = (vecs_on) ? 'V' : 'N';
1776 blas_int N = blas_int(A.n_rows);
1777 eT* vl = junk.memptr();
1778 eT* vr = (vecs_on) ? vecs.memptr() : junk.memptr();
1779 blas_int ldvl = blas_int(1);
1780 blas_int ldvr = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1);
1781 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1),2*N)
1782 blas_int info = 0;
1783
1784 podarray<eT> alpha(A.n_rows);
1785 podarray<eT> beta(A.n_rows);
1786
1787 podarray<eT> work( static_cast<uword>(lwork) );
1788 podarray<T> rwork( static_cast<uword>(8*N) );
1789
1790 arma_extra_debug_print("lapack::cx_ggev()");
1791 lapack::cx_ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alpha.memptr(), beta.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, rwork.memptr(), &info);
1792
1793 if(info != 0) { return false; }
1794
1795 eT* vals_mem = vals.memptr();
1796 const eT* alpha_mem = alpha.memptr();
1797 const eT* beta_mem = beta.memptr();
1798
1799 const std::complex<T> zero(T(0), T(0));
1800
1801 bool beta_has_zero = false;
1802
1803 for(uword i=0; i<A.n_rows; ++i)
1804 {
1805 const eT& beta_val = beta_mem[i];
1806
1807 vals_mem[i] = alpha_mem[i] / beta_val;
1808
1809 beta_has_zero = (beta_has_zero || (beta_val == zero));
1810 }
1811
1812 if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); }
1813
1814 return true;
1815 }
1816 #else
1817 {
1818 arma_ignore(vals);
1819 arma_ignore(vecs);
1820 arma_ignore(vecs_on);
1821 arma_ignore(A_expr);
1822 arma_ignore(B_expr);
1823 arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled");
1824 return false;
1825 }
1826 #endif
1827 }
1828
1829
1830
1831 //! two-sided eigendecomposition of general square matrix pair (real)
1832 template<typename T1, typename T2>
1833 inline
1834 bool
eig_pair_twosided(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & lvecs,Mat<std::complex<typename T1::pod_type>> & rvecs,const Base<typename T1::pod_type,T1> & A_expr,const Base<typename T1::pod_type,T2> & B_expr)1835 auxlib::eig_pair_twosided
1836 (
1837 Mat< std::complex<typename T1::pod_type> >& vals,
1838 Mat< std::complex<typename T1::pod_type> >& lvecs,
1839 Mat< std::complex<typename T1::pod_type> >& rvecs,
1840 const Base<typename T1::pod_type,T1>& A_expr,
1841 const Base<typename T1::pod_type,T2>& B_expr
1842 )
1843 {
1844 arma_extra_debug_sigprint();
1845
1846 #if defined(ARMA_USE_LAPACK)
1847 {
1848 typedef typename T1::pod_type T;
1849 typedef std::complex<T> eT;
1850
1851 Mat<T> A(A_expr.get_ref());
1852 Mat<T> B(B_expr.get_ref());
1853
1854 arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" );
1855
1856 arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" );
1857
1858 arma_debug_assert_blas_size(A);
1859
1860 if(A.is_empty())
1861 {
1862 vals.reset();
1863 lvecs.reset();
1864 rvecs.reset();
1865 return true;
1866 }
1867
1868 if(A.is_finite() == false) { return false; }
1869 if(B.is_finite() == false) { return false; }
1870
1871 vals.set_size(A.n_rows, 1);
1872
1873 lvecs.set_size(A.n_rows, A.n_rows);
1874 rvecs.set_size(A.n_rows, A.n_rows);
1875
1876 Mat<T> ltmp(A.n_rows, A.n_rows, arma_nozeros_indicator());
1877 Mat<T> rtmp(A.n_rows, A.n_rows, arma_nozeros_indicator());
1878
1879 char jobvl = 'V';
1880 char jobvr = 'V';
1881 blas_int N = blas_int(A.n_rows);
1882 blas_int ldvl = blas_int(ltmp.n_rows);
1883 blas_int ldvr = blas_int(rtmp.n_rows);
1884 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 8*N)
1885 blas_int info = 0;
1886
1887 podarray<T> alphar(A.n_rows);
1888 podarray<T> alphai(A.n_rows);
1889 podarray<T> beta(A.n_rows);
1890
1891 podarray<T> work( static_cast<uword>(lwork) );
1892
1893 arma_extra_debug_print("lapack::ggev()");
1894 lapack::ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alphar.memptr(), alphai.memptr(), beta.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, work.memptr(), &lwork, &info);
1895
1896 if(info != 0) { return false; }
1897
1898 arma_extra_debug_print("reformatting eigenvalues and eigenvectors");
1899
1900 eT* vals_mem = vals.memptr();
1901 const T* alphar_mem = alphar.memptr();
1902 const T* alphai_mem = alphai.memptr();
1903 const T* beta_mem = beta.memptr();
1904
1905 bool beta_has_zero = false;
1906
1907 for(uword j=0; j<A.n_rows; ++j)
1908 {
1909 const T alphai_val = alphai_mem[j];
1910 const T beta_val = beta_mem[j];
1911
1912 const T re = alphar_mem[j] / beta_val;
1913 const T im = alphai_val / beta_val;
1914
1915 beta_has_zero = (beta_has_zero || (beta_val == T(0)));
1916
1917 vals_mem[j] = std::complex<T>(re, im);
1918
1919 if( (alphai_val > T(0)) && (j < (A.n_rows-1)) )
1920 {
1921 ++j;
1922 vals_mem[j] = std::complex<T>(re,-im); // force exact conjugate
1923 }
1924 }
1925
1926 if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); }
1927
1928 for(uword j=0; j < A.n_rows; ++j)
1929 {
1930 if( (j < (A.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) )
1931 {
1932 for(uword i=0; i < A.n_rows; ++i)
1933 {
1934 lvecs.at(i,j) = std::complex<T>( ltmp.at(i,j), ltmp.at(i,j+1) );
1935 lvecs.at(i,j+1) = std::complex<T>( ltmp.at(i,j), -ltmp.at(i,j+1) );
1936 rvecs.at(i,j) = std::complex<T>( rtmp.at(i,j), rtmp.at(i,j+1) );
1937 rvecs.at(i,j+1) = std::complex<T>( rtmp.at(i,j), -rtmp.at(i,j+1) );
1938 }
1939 ++j;
1940 }
1941 else
1942 {
1943 for(uword i=0; i<A.n_rows; ++i)
1944 {
1945 lvecs.at(i,j) = std::complex<T>(ltmp.at(i,j), T(0));
1946 rvecs.at(i,j) = std::complex<T>(rtmp.at(i,j), T(0));
1947 }
1948 }
1949 }
1950
1951 return true;
1952 }
1953 #else
1954 {
1955 arma_ignore(vals);
1956 arma_ignore(lvecs);
1957 arma_ignore(rvecs);
1958 arma_ignore(A_expr);
1959 arma_ignore(B_expr);
1960 arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled");
1961 return false;
1962 }
1963 #endif
1964 }
1965
1966
1967
1968 //! two-sided eigendecomposition of general square matrix pair (complex)
1969 template<typename T1, typename T2>
1970 inline
1971 bool
eig_pair_twosided(Mat<std::complex<typename T1::pod_type>> & vals,Mat<std::complex<typename T1::pod_type>> & lvecs,Mat<std::complex<typename T1::pod_type>> & rvecs,const Base<std::complex<typename T1::pod_type>,T1> & A_expr,const Base<std::complex<typename T1::pod_type>,T2> & B_expr)1972 auxlib::eig_pair_twosided
1973 (
1974 Mat< std::complex<typename T1::pod_type> >& vals,
1975 Mat< std::complex<typename T1::pod_type> >& lvecs,
1976 Mat< std::complex<typename T1::pod_type> >& rvecs,
1977 const Base< std::complex<typename T1::pod_type>, T1 >& A_expr,
1978 const Base< std::complex<typename T1::pod_type>, T2 >& B_expr
1979 )
1980 {
1981 arma_extra_debug_sigprint();
1982
1983 #if defined(ARMA_USE_LAPACK)
1984 {
1985 typedef typename T1::pod_type T;
1986 typedef typename std::complex<T> eT;
1987
1988 Mat<eT> A(A_expr.get_ref());
1989 Mat<eT> B(B_expr.get_ref());
1990
1991 arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" );
1992
1993 arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" );
1994
1995 arma_debug_assert_blas_size(A);
1996
1997 if(A.is_empty())
1998 {
1999 vals.reset();
2000 lvecs.reset();
2001 rvecs.reset();
2002 return true;
2003 }
2004
2005 if(A.is_finite() == false) { return false; }
2006 if(B.is_finite() == false) { return false; }
2007
2008 vals.set_size(A.n_rows, 1);
2009
2010 lvecs.set_size(A.n_rows, A.n_rows);
2011 rvecs.set_size(A.n_rows, A.n_rows);
2012
2013 char jobvl = 'V';
2014 char jobvr = 'V';
2015 blas_int N = blas_int(A.n_rows);
2016 blas_int ldvl = blas_int(lvecs.n_rows);
2017 blas_int ldvr = blas_int(rvecs.n_rows);
2018 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1),2*N)
2019 blas_int info = 0;
2020
2021 podarray<eT> alpha(A.n_rows);
2022 podarray<eT> beta(A.n_rows);
2023
2024 podarray<eT> work( static_cast<uword>(lwork) );
2025 podarray<T> rwork( static_cast<uword>(8*N) );
2026
2027 arma_extra_debug_print("lapack::cx_ggev()");
2028 lapack::cx_ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alpha.memptr(), beta.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, work.memptr(), &lwork, rwork.memptr(), &info);
2029
2030 if(info != 0) { return false; }
2031
2032 eT* vals_mem = vals.memptr();
2033 const eT* alpha_mem = alpha.memptr();
2034 const eT* beta_mem = beta.memptr();
2035
2036 const std::complex<T> zero(T(0), T(0));
2037
2038 bool beta_has_zero = false;
2039
2040 for(uword i=0; i<A.n_rows; ++i)
2041 {
2042 const eT& beta_val = beta_mem[i];
2043
2044 vals_mem[i] = alpha_mem[i] / beta_val;
2045
2046 beta_has_zero = (beta_has_zero || (beta_val == zero));
2047 }
2048
2049 if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); }
2050
2051 return true;
2052 }
2053 #else
2054 {
2055 arma_ignore(vals);
2056 arma_ignore(lvecs);
2057 arma_ignore(rvecs);
2058 arma_ignore(A_expr);
2059 arma_ignore(B_expr);
2060 arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled");
2061 return false;
2062 }
2063 #endif
2064 }
2065
2066
2067
2068 //! eigenvalues of a symmetric real matrix
2069 template<typename eT, typename T1>
2070 inline
2071 bool
eig_sym(Col<eT> & eigval,const Base<eT,T1> & X)2072 auxlib::eig_sym(Col<eT>& eigval, const Base<eT,T1>& X)
2073 {
2074 arma_extra_debug_sigprint();
2075
2076 #if defined(ARMA_USE_LAPACK)
2077 {
2078 Mat<eT> A(X.get_ref());
2079
2080 arma_debug_check( (A.is_square() == false), "eig_sym(): given matrix must be square sized" );
2081
2082 if(A.is_empty())
2083 {
2084 eigval.reset();
2085 return true;
2086 }
2087
2088 // if(auxlib::rudimentary_sym_check(A) == false)
2089 // {
2090 // arma_debug_warn_level(1, "eig_sym(): given matrix is not symmetric");
2091 // return false;
2092 // }
2093
2094 if((arma_config::debug) && (auxlib::rudimentary_sym_check(A) == false))
2095 {
2096 arma_debug_warn_level(1, "eig_sym(): given matrix is not symmetric");
2097 }
2098
2099 arma_debug_assert_blas_size(A);
2100
2101 eigval.set_size(A.n_rows);
2102
2103 char jobz = 'N';
2104 char uplo = 'U';
2105
2106 blas_int N = blas_int(A.n_rows);
2107 blas_int lwork = (64+2)*N; // lwork_min = (std::max)(blas_int(1), 3*N-1)
2108 blas_int info = 0;
2109
2110 podarray<eT> work( static_cast<uword>(lwork) );
2111
2112 arma_extra_debug_print("lapack::syev()");
2113 lapack::syev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info);
2114
2115 return (info == 0);
2116 }
2117 #else
2118 {
2119 arma_ignore(eigval);
2120 arma_ignore(X);
2121 arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
2122 return false;
2123 }
2124 #endif
2125 }
2126
2127
2128
2129 //! eigenvalues of a hermitian complex matrix
2130 template<typename T, typename T1>
2131 inline
2132 bool
eig_sym(Col<T> & eigval,const Base<std::complex<T>,T1> & X)2133 auxlib::eig_sym(Col<T>& eigval, const Base<std::complex<T>,T1>& X)
2134 {
2135 arma_extra_debug_sigprint();
2136
2137 #if defined(ARMA_USE_LAPACK)
2138 {
2139 typedef typename std::complex<T> eT;
2140
2141 Mat<eT> A(X.get_ref());
2142
2143 arma_debug_check( (A.is_square() == false), "eig_sym(): given matrix must be square sized" );
2144
2145 if(A.is_empty())
2146 {
2147 eigval.reset();
2148 return true;
2149 }
2150
2151 // if(auxlib::rudimentary_sym_check(A) == false)
2152 // {
2153 // arma_debug_warn_level(1, "eig_sym(): given matrix is not hermitian");
2154 // return false;
2155 // }
2156
2157 if((arma_config::debug) && (auxlib::rudimentary_sym_check(A) == false))
2158 {
2159 arma_debug_warn_level(1, "eig_sym(): given matrix is not hermitian");
2160 }
2161
2162 arma_debug_assert_blas_size(A);
2163
2164 eigval.set_size(A.n_rows);
2165
2166 char jobz = 'N';
2167 char uplo = 'U';
2168
2169 blas_int N = blas_int(A.n_rows);
2170 blas_int lwork = (64+1)*N; // lwork_min = (std::max)(blas_int(1), 2*N-1)
2171 blas_int info = 0;
2172
2173 podarray<eT> work( static_cast<uword>(lwork) );
2174 podarray<T> rwork( static_cast<uword>( (std::max)(blas_int(1), 3*N) ) );
2175
2176 arma_extra_debug_print("lapack::heev()");
2177 lapack::heev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
2178
2179 return (info == 0);
2180 }
2181 #else
2182 {
2183 arma_ignore(eigval);
2184 arma_ignore(X);
2185 arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
2186 return false;
2187 }
2188 #endif
2189 }
2190
2191
2192
2193 //! eigenvalues and eigenvectors of a symmetric real matrix
2194 template<typename eT>
2195 inline
2196 bool
eig_sym(Col<eT> & eigval,Mat<eT> & eigvec,const Mat<eT> & X)2197 auxlib::eig_sym(Col<eT>& eigval, Mat<eT>& eigvec, const Mat<eT>& X)
2198 {
2199 arma_extra_debug_sigprint();
2200
2201 #if defined(ARMA_USE_LAPACK)
2202 {
2203 eigvec = X;
2204
2205 arma_debug_check( (eigvec.is_square() == false), "eig_sym(): given matrix must be square sized" );
2206
2207 if(eigvec.is_empty())
2208 {
2209 eigval.reset();
2210 eigvec.reset();
2211 return true;
2212 }
2213
2214 arma_debug_assert_blas_size(eigvec);
2215
2216 eigval.set_size(eigvec.n_rows);
2217
2218 char jobz = 'V';
2219 char uplo = 'U';
2220
2221 blas_int N = blas_int(eigvec.n_rows);
2222 blas_int lwork = (64+2)*N; // lwork_min = (std::max)(blas_int(1), 3*N-1)
2223 blas_int info = 0;
2224
2225 podarray<eT> work( static_cast<uword>(lwork) );
2226
2227 arma_extra_debug_print("lapack::syev()");
2228 lapack::syev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info);
2229
2230 return (info == 0);
2231 }
2232 #else
2233 {
2234 arma_ignore(eigval);
2235 arma_ignore(eigvec);
2236 arma_ignore(X);
2237 arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
2238 return false;
2239 }
2240 #endif
2241 }
2242
2243
2244
2245 //! eigenvalues and eigenvectors of a hermitian complex matrix
2246 template<typename T>
2247 inline
2248 bool
eig_sym(Col<T> & eigval,Mat<std::complex<T>> & eigvec,const Mat<std::complex<T>> & X)2249 auxlib::eig_sym(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Mat< std::complex<T> >& X)
2250 {
2251 arma_extra_debug_sigprint();
2252
2253 #if defined(ARMA_USE_LAPACK)
2254 {
2255 typedef typename std::complex<T> eT;
2256
2257 eigvec = X;
2258
2259 arma_debug_check( (eigvec.is_square() == false), "eig_sym(): given matrix must be square sized" );
2260
2261 if(eigvec.is_empty())
2262 {
2263 eigval.reset();
2264 eigvec.reset();
2265 return true;
2266 }
2267
2268 arma_debug_assert_blas_size(eigvec);
2269
2270 eigval.set_size(eigvec.n_rows);
2271
2272 char jobz = 'V';
2273 char uplo = 'U';
2274
2275 blas_int N = blas_int(eigvec.n_rows);
2276 blas_int lwork = (64+1)*N; // lwork_min = (std::max)(blas_int(1), 2*N-1)
2277 blas_int info = 0;
2278
2279 podarray<eT> work( static_cast<uword>(lwork) );
2280 podarray<T> rwork( static_cast<uword>((std::max)(blas_int(1), 3*N)) );
2281
2282 arma_extra_debug_print("lapack::heev()");
2283 lapack::heev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
2284
2285 return (info == 0);
2286 }
2287 #else
2288 {
2289 arma_ignore(eigval);
2290 arma_ignore(eigvec);
2291 arma_ignore(X);
2292 arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
2293 return false;
2294 }
2295 #endif
2296 }
2297
2298
2299
2300 //! eigenvalues and eigenvectors of a symmetric real matrix (divide and conquer algorithm)
2301 template<typename eT>
2302 inline
2303 bool
eig_sym_dc(Col<eT> & eigval,Mat<eT> & eigvec,const Mat<eT> & X)2304 auxlib::eig_sym_dc(Col<eT>& eigval, Mat<eT>& eigvec, const Mat<eT>& X)
2305 {
2306 arma_extra_debug_sigprint();
2307
2308 #if defined(ARMA_USE_LAPACK)
2309 {
2310 eigvec = X;
2311
2312 arma_debug_check( (eigvec.is_square() == false), "eig_sym(): given matrix must be square sized" );
2313
2314 if(eigvec.is_empty())
2315 {
2316 eigval.reset();
2317 eigvec.reset();
2318 return true;
2319 }
2320
2321 arma_debug_assert_blas_size(eigvec);
2322
2323 eigval.set_size(eigvec.n_rows);
2324
2325 char jobz = 'V';
2326 char uplo = 'U';
2327
2328 blas_int N = blas_int(eigvec.n_rows);
2329 blas_int lwork_min = 1 + 6*N + 2*(N*N);
2330 blas_int liwork_min = 3 + 5*N;
2331 blas_int info = 0;
2332
2333 blas_int lwork_proposed = 0;
2334 blas_int liwork_proposed = 0;
2335
2336 if(N >= 32)
2337 {
2338 eT work_query[2];
2339 blas_int iwork_query[2];
2340
2341 blas_int lwork_query = -1;
2342 blas_int liwork_query = -1;
2343
2344 arma_extra_debug_print("lapack::syevd()");
2345 lapack::syevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), &work_query[0], &lwork_query, &iwork_query[0], &liwork_query, &info);
2346
2347 if(info != 0) { return false; }
2348
2349 lwork_proposed = static_cast<blas_int>( work_query[0] );
2350 liwork_proposed = iwork_query[0];
2351 }
2352
2353 blas_int lwork_final = (std::max)( lwork_proposed, lwork_min);
2354 blas_int liwork_final = (std::max)(liwork_proposed, liwork_min);
2355
2356 podarray<eT> work( static_cast<uword>( lwork_final) );
2357 podarray<blas_int> iwork( static_cast<uword>(liwork_final) );
2358
2359 arma_extra_debug_print("lapack::syevd()");
2360 lapack::syevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork_final, iwork.memptr(), &liwork_final, &info);
2361
2362 return (info == 0);
2363 }
2364 #else
2365 {
2366 arma_ignore(eigval);
2367 arma_ignore(eigvec);
2368 arma_ignore(X);
2369 arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
2370 return false;
2371 }
2372 #endif
2373 }
2374
2375
2376
2377 //! eigenvalues and eigenvectors of a hermitian complex matrix (divide and conquer algorithm)
2378 template<typename T>
2379 inline
2380 bool
eig_sym_dc(Col<T> & eigval,Mat<std::complex<T>> & eigvec,const Mat<std::complex<T>> & X)2381 auxlib::eig_sym_dc(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Mat< std::complex<T> >& X)
2382 {
2383 arma_extra_debug_sigprint();
2384
2385 #if defined(ARMA_USE_LAPACK)
2386 {
2387 typedef typename std::complex<T> eT;
2388
2389 eigvec = X;
2390
2391 arma_debug_check( (eigvec.is_square() == false), "eig_sym(): given matrix must be square sized" );
2392
2393 if(eigvec.is_empty())
2394 {
2395 eigval.reset();
2396 eigvec.reset();
2397 return true;
2398 }
2399
2400 arma_debug_assert_blas_size(eigvec);
2401
2402 eigval.set_size(eigvec.n_rows);
2403
2404 char jobz = 'V';
2405 char uplo = 'U';
2406
2407 blas_int N = blas_int(eigvec.n_rows);
2408 blas_int lwork_min = 2*N + N*N;
2409 blas_int lrwork_min = 1 + 5*N + 2*(N*N);
2410 blas_int liwork_min = 3 + 5*N;
2411 blas_int info = 0;
2412
2413 blas_int lwork_proposed = 0;
2414 blas_int lrwork_proposed = 0;
2415 blas_int liwork_proposed = 0;
2416
2417 if(N >= 32)
2418 {
2419 eT work_query[2];
2420 T rwork_query[2];
2421 blas_int iwork_query[2];
2422
2423 blas_int lwork_query = -1;
2424 blas_int lrwork_query = -1;
2425 blas_int liwork_query = -1;
2426
2427 arma_extra_debug_print("lapack::heevd()");
2428 lapack::heevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), &work_query[0], &lwork_query, &rwork_query[0], &lrwork_query, &iwork_query[0], &liwork_query, &info);
2429
2430 if(info != 0) { return false; }
2431
2432 lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
2433 lrwork_proposed = static_cast<blas_int>( rwork_query[0] );
2434 liwork_proposed = iwork_query[0];
2435 }
2436
2437 blas_int lwork_final = (std::max)( lwork_proposed, lwork_min);
2438 blas_int lrwork_final = (std::max)(lrwork_proposed, lrwork_min);
2439 blas_int liwork_final = (std::max)(liwork_proposed, liwork_min);
2440
2441 podarray<eT> work( static_cast<uword>( lwork_final) );
2442 podarray< T> rwork( static_cast<uword>(lrwork_final) );
2443 podarray<blas_int> iwork( static_cast<uword>(liwork_final) );
2444
2445 arma_extra_debug_print("lapack::heevd()");
2446 lapack::heevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork_final, rwork.memptr(), &lrwork_final, iwork.memptr(), &liwork_final, &info);
2447
2448 return (info == 0);
2449 }
2450 #else
2451 {
2452 arma_ignore(eigval);
2453 arma_ignore(eigvec);
2454 arma_ignore(X);
2455 arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled");
2456 return false;
2457 }
2458 #endif
2459 }
2460
2461
2462
2463 template<typename eT>
2464 inline
2465 bool
chol_simple(Mat<eT> & X)2466 auxlib::chol_simple(Mat<eT>& X)
2467 {
2468 arma_extra_debug_sigprint();
2469
2470 #if defined(ARMA_USE_ATLAS)
2471 {
2472 arma_debug_assert_atlas_size(X);
2473
2474 int info = 0;
2475
2476 arma_extra_debug_print("atlas::clapack_potrf()");
2477 info = atlas::clapack_potrf(atlas::CblasColMajor, atlas::CblasUpper, X.n_rows, X.memptr(), X.n_rows);
2478
2479 return (info == 0);
2480 }
2481 #elif defined(ARMA_USE_LAPACK)
2482 {
2483 arma_debug_assert_blas_size(X);
2484
2485 char uplo = 'U';
2486 blas_int n = blas_int(X.n_rows);
2487 blas_int info = 0;
2488
2489 arma_extra_debug_print("lapack::potrf()");
2490 lapack::potrf(&uplo, &n, X.memptr(), &n, &info);
2491
2492 return (info == 0);
2493 }
2494 #else
2495 {
2496 arma_ignore(X);
2497
2498 arma_stop_logic_error("chol(): use of ATLAS or LAPACK must be enabled");
2499 return false;
2500 }
2501 #endif
2502 }
2503
2504
2505
2506 template<typename eT>
2507 inline
2508 bool
chol(Mat<eT> & X,const uword layout)2509 auxlib::chol(Mat<eT>& X, const uword layout)
2510 {
2511 arma_extra_debug_sigprint();
2512
2513 #if defined(ARMA_USE_ATLAS)
2514 {
2515 arma_debug_assert_atlas_size(X);
2516
2517 int info = 0;
2518
2519 arma_extra_debug_print("atlas::clapack_potrf()");
2520 info = atlas::clapack_potrf(atlas::CblasColMajor, ((layout == 0) ? atlas::CblasUpper : atlas::CblasLower), X.n_rows, X.memptr(), X.n_rows);
2521
2522 if(info != 0) { return false; }
2523
2524 X = (layout == 0) ? trimatu(X) : trimatl(X); // trimatu() and trimatl() return the same type
2525
2526 return true;
2527 }
2528 #elif defined(ARMA_USE_LAPACK)
2529 {
2530 arma_debug_assert_blas_size(X);
2531
2532 char uplo = (layout == 0) ? 'U' : 'L';
2533 blas_int n = blas_int(X.n_rows);
2534 blas_int info = 0;
2535
2536 arma_extra_debug_print("lapack::potrf()");
2537 lapack::potrf(&uplo, &n, X.memptr(), &n, &info);
2538
2539 if(info != 0) { return false; }
2540
2541 X = (layout == 0) ? trimatu(X) : trimatl(X); // trimatu() and trimatl() return the same type
2542
2543 return true;
2544 }
2545 #else
2546 {
2547 arma_ignore(X);
2548 arma_ignore(layout);
2549
2550 arma_stop_logic_error("chol(): use of ATLAS or LAPACK must be enabled");
2551 return false;
2552 }
2553 #endif
2554 }
2555
2556
2557
2558 template<typename eT>
2559 inline
2560 bool
chol_band(Mat<eT> & X,const uword KD,const uword layout)2561 auxlib::chol_band(Mat<eT>& X, const uword KD, const uword layout)
2562 {
2563 arma_extra_debug_sigprint();
2564
2565 return auxlib::chol_band_common(X, KD, layout);
2566 }
2567
2568
2569
2570 template<typename T>
2571 inline
2572 bool
chol_band(Mat<std::complex<T>> & X,const uword KD,const uword layout)2573 auxlib::chol_band(Mat< std::complex<T> >& X, const uword KD, const uword layout)
2574 {
2575 arma_extra_debug_sigprint();
2576
2577 #if defined(ARMA_CRIPPLED_LAPACK)
2578 {
2579 arma_extra_debug_print("auxlib::chol_band(): redirecting to auxlib::chol() due to crippled LAPACK");
2580
2581 arma_ignore(KD);
2582
2583 return auxlib::chol(X, layout);
2584 }
2585 #else
2586 {
2587 return auxlib::chol_band_common(X, KD, layout);
2588 }
2589 #endif
2590 }
2591
2592
2593
2594 template<typename eT>
2595 inline
2596 bool
chol_band_common(Mat<eT> & X,const uword KD,const uword layout)2597 auxlib::chol_band_common(Mat<eT>& X, const uword KD, const uword layout)
2598 {
2599 arma_extra_debug_sigprint();
2600
2601 #if defined(ARMA_USE_LAPACK)
2602 {
2603 const uword N = X.n_rows;
2604
2605 const uword KL = (layout == 0) ? uword(0) : KD;
2606 const uword KU = (layout == 0) ? KD : uword(0);
2607
2608 Mat<eT> AB;
2609 band_helper::compress(AB, X, KL, KU, false);
2610
2611 arma_debug_assert_blas_size(AB);
2612
2613 char uplo = (layout == 0) ? 'U' : 'L';
2614 blas_int n = blas_int(N);
2615 blas_int kd = blas_int(KD);
2616 blas_int ldab = blas_int(AB.n_rows);
2617 blas_int info = 0;
2618
2619 arma_extra_debug_print("lapack::pbtrf()");
2620 lapack::pbtrf(&uplo, &n, &kd, AB.memptr(), &ldab, &info);
2621
2622 if(info != 0) { return false; }
2623
2624 band_helper::uncompress(X, AB, KL, KU, false);
2625
2626 return true;
2627 }
2628 #else
2629 {
2630 arma_ignore(X);
2631 arma_ignore(KD);
2632 arma_ignore(layout);
2633
2634 arma_stop_logic_error("chol(): use of LAPACK must be enabled");
2635 return false;
2636 }
2637 #endif
2638 }
2639
2640
2641
2642 template<typename eT>
2643 inline
2644 bool
chol_pivot(Mat<eT> & X,Mat<uword> & P,const uword layout)2645 auxlib::chol_pivot(Mat<eT>& X, Mat<uword>& P, const uword layout)
2646 {
2647 arma_extra_debug_sigprint();
2648
2649 #if defined(ARMA_USE_LAPACK)
2650 {
2651 typedef typename get_pod_type<eT>::result T;
2652
2653 arma_debug_assert_blas_size(X);
2654
2655 char uplo = (layout == 0) ? 'U' : 'L';
2656 blas_int n = blas_int(X.n_rows);
2657 blas_int rank = 0;
2658 T tol = T(-1);
2659 blas_int info = 0;
2660
2661 podarray<blas_int> ipiv( X.n_rows);
2662 podarray<T> work(2*X.n_rows);
2663
2664 ipiv.zeros();
2665
2666 arma_extra_debug_print("lapack::pstrf()");
2667 lapack::pstrf(&uplo, &n, X.memptr(), &n, ipiv.memptr(), &rank, &tol, work.memptr(), &info);
2668
2669 if(info != 0) { return false; }
2670
2671 X = (layout == 0) ? trimatu(X) : trimatl(X); // trimatu() and trimatl() return the same type
2672
2673 P.set_size(X.n_rows, 1);
2674
2675 for(uword i=0; i < X.n_rows; ++i)
2676 {
2677 P[i] = uword(ipiv[i] - 1); // take into account that Fortran counts from 1
2678 }
2679
2680 return true;
2681 }
2682 #else
2683 {
2684 arma_ignore(X);
2685 arma_ignore(P);
2686 arma_ignore(layout);
2687
2688 arma_stop_logic_error("chol(): use of LAPACK must be enabled");
2689 return false;
2690 }
2691 #endif
2692 }
2693
2694
2695
2696 //
2697 // hessenberg decomposition
2698 template<typename eT, typename T1>
2699 inline
2700 bool
hess(Mat<eT> & H,const Base<eT,T1> & X,Col<eT> & tao)2701 auxlib::hess(Mat<eT>& H, const Base<eT,T1>& X, Col<eT>& tao)
2702 {
2703 arma_extra_debug_sigprint();
2704
2705 #if defined(ARMA_USE_LAPACK)
2706 {
2707 H = X.get_ref();
2708
2709 arma_debug_check( (H.is_square() == false), "hess(): given matrix must be square sized" );
2710
2711 if(H.is_empty())
2712 {
2713 return true;
2714 }
2715
2716 arma_debug_assert_blas_size(H);
2717
2718 if(H.n_rows > 2)
2719 {
2720 tao.set_size(H.n_rows-1);
2721
2722 blas_int n = blas_int(H.n_rows);
2723 blas_int ilo = 1;
2724 blas_int ihi = blas_int(H.n_rows);
2725 blas_int lda = blas_int(H.n_rows);
2726 blas_int lwork = blas_int(H.n_rows) * 64;
2727 blas_int info = 0;
2728
2729 podarray<eT> work(static_cast<uword>(lwork));
2730
2731 arma_extra_debug_print("lapack::gehrd()");
2732 lapack::gehrd(&n, &ilo, &ihi, H.memptr(), &lda, tao.memptr(), work.memptr(), &lwork, &info);
2733
2734 return (info == 0);
2735 }
2736
2737 return true;
2738 }
2739 #else
2740 {
2741 arma_ignore(H);
2742 arma_ignore(X);
2743 arma_ignore(tao);
2744 arma_stop_logic_error("hess(): use of LAPACK must be enabled");
2745 return false;
2746 }
2747 #endif
2748 }
2749
2750
2751
2752 template<typename eT, typename T1>
2753 inline
2754 bool
qr(Mat<eT> & Q,Mat<eT> & R,const Base<eT,T1> & X)2755 auxlib::qr(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X)
2756 {
2757 arma_extra_debug_sigprint();
2758
2759 #if defined(ARMA_USE_LAPACK)
2760 {
2761 R = X.get_ref();
2762
2763 const uword R_n_rows = R.n_rows;
2764 const uword R_n_cols = R.n_cols;
2765
2766 if(R.is_empty())
2767 {
2768 Q.eye(R_n_rows, R_n_rows);
2769 return true;
2770 }
2771
2772 arma_debug_assert_blas_size(R);
2773
2774 blas_int m = static_cast<blas_int>(R_n_rows);
2775 blas_int n = static_cast<blas_int>(R_n_cols);
2776 blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n)); // take into account requirements of geqrf() _and_ orgqr()/ungqr()
2777 blas_int k = (std::min)(m,n);
2778 blas_int info = 0;
2779
2780 podarray<eT> tau( static_cast<uword>(k) );
2781
2782 eT work_query[2];
2783 blas_int lwork_query = -1;
2784
2785 arma_extra_debug_print("lapack::geqrf()");
2786 lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info);
2787
2788 if(info != 0) { return false; }
2789
2790 blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
2791 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
2792
2793 podarray<eT> work( static_cast<uword>(lwork_final) );
2794
2795 arma_extra_debug_print("lapack::geqrf()");
2796 lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
2797
2798 if(info != 0) { return false; }
2799
2800 Q.set_size(R_n_rows, R_n_rows);
2801
2802 arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) );
2803
2804 //
2805 // construct R
2806
2807 for(uword col=0; col < R_n_cols; ++col)
2808 {
2809 for(uword row=(col+1); row < R_n_rows; ++row)
2810 {
2811 R.at(row,col) = eT(0);
2812 }
2813 }
2814
2815
2816 if( (is_float<eT>::value) || (is_double<eT>::value) )
2817 {
2818 arma_extra_debug_print("lapack::orgqr()");
2819 lapack::orgqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
2820 }
2821 else
2822 if( (is_cx_float<eT>::value) || (is_cx_double<eT>::value) )
2823 {
2824 arma_extra_debug_print("lapack::ungqr()");
2825 lapack::ungqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
2826 }
2827
2828 return (info == 0);
2829 }
2830 #else
2831 {
2832 arma_ignore(Q);
2833 arma_ignore(R);
2834 arma_ignore(X);
2835 arma_stop_logic_error("qr(): use of LAPACK must be enabled");
2836 return false;
2837 }
2838 #endif
2839 }
2840
2841
2842
2843 template<typename eT, typename T1>
2844 inline
2845 bool
qr_econ(Mat<eT> & Q,Mat<eT> & R,const Base<eT,T1> & X)2846 auxlib::qr_econ(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X)
2847 {
2848 arma_extra_debug_sigprint();
2849
2850 #if defined(ARMA_USE_LAPACK)
2851 {
2852 if(is_Mat<T1>::value)
2853 {
2854 const unwrap<T1> tmp(X.get_ref());
2855 const Mat<eT>& M = tmp.M;
2856
2857 if(M.n_rows < M.n_cols)
2858 {
2859 return auxlib::qr(Q, R, X);
2860 }
2861 }
2862
2863 Q = X.get_ref();
2864
2865 const uword Q_n_rows = Q.n_rows;
2866 const uword Q_n_cols = Q.n_cols;
2867
2868 if( Q_n_rows <= Q_n_cols )
2869 {
2870 return auxlib::qr(Q, R, Q);
2871 }
2872
2873 if(Q.is_empty())
2874 {
2875 Q.set_size(Q_n_rows, 0 );
2876 R.set_size(0, Q_n_cols);
2877 return true;
2878 }
2879
2880 arma_debug_assert_blas_size(Q);
2881
2882 blas_int m = static_cast<blas_int>(Q_n_rows);
2883 blas_int n = static_cast<blas_int>(Q_n_cols);
2884 blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n)); // take into account requirements of geqrf() _and_ orgqr()/ungqr()
2885 blas_int k = (std::min)(m,n);
2886 blas_int info = 0;
2887
2888 podarray<eT> tau( static_cast<uword>(k) );
2889
2890 eT work_query[2];
2891 blas_int lwork_query = -1;
2892
2893 arma_extra_debug_print("lapack::geqrf()");
2894 lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info);
2895
2896 if(info != 0) { return false; }
2897
2898 blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
2899 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
2900
2901 podarray<eT> work( static_cast<uword>(lwork_final) );
2902
2903 arma_extra_debug_print("lapack::geqrf()");
2904 lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
2905
2906 if(info != 0) { return false; }
2907
2908 R.zeros(Q_n_cols, Q_n_cols);
2909
2910 //
2911 // construct R
2912
2913 for(uword col=0; col < Q_n_cols; ++col)
2914 {
2915 for(uword row=0; row <= col; ++row)
2916 {
2917 R.at(row,col) = Q.at(row,col);
2918 }
2919 }
2920
2921 if( (is_float<eT>::value) || (is_double<eT>::value) )
2922 {
2923 arma_extra_debug_print("lapack::orgqr()");
2924 lapack::orgqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
2925 }
2926 else
2927 if( (is_cx_float<eT>::value) || (is_cx_double<eT>::value) )
2928 {
2929 arma_extra_debug_print("lapack::ungqr()");
2930 lapack::ungqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
2931 }
2932
2933 return (info == 0);
2934 }
2935 #else
2936 {
2937 arma_ignore(Q);
2938 arma_ignore(R);
2939 arma_ignore(X);
2940 arma_stop_logic_error("qr_econ(): use of LAPACK must be enabled");
2941 return false;
2942 }
2943 #endif
2944 }
2945
2946
2947
2948 template<typename eT, typename T1>
2949 inline
2950 bool
qr_pivot(Mat<eT> & Q,Mat<eT> & R,Mat<uword> & P,const Base<eT,T1> & X)2951 auxlib::qr_pivot(Mat<eT>& Q, Mat<eT>& R, Mat<uword>& P, const Base<eT,T1>& X)
2952 {
2953 arma_extra_debug_sigprint();
2954
2955 #if defined(ARMA_USE_LAPACK)
2956 {
2957 R = X.get_ref();
2958
2959 const uword R_n_rows = R.n_rows;
2960 const uword R_n_cols = R.n_cols;
2961
2962 if(R.is_empty())
2963 {
2964 Q.eye(R_n_rows, R_n_rows);
2965
2966 P.set_size(R_n_cols, 1);
2967
2968 for(uword col=0; col < R_n_cols; ++col) { P.at(col) = col; }
2969
2970 return true;
2971 }
2972
2973 arma_debug_assert_blas_size(R);
2974
2975 blas_int m = static_cast<blas_int>(R_n_rows);
2976 blas_int n = static_cast<blas_int>(R_n_cols);
2977 blas_int lwork_min = (std::max)(blas_int(3*n + 1), (std::max)(m,n)); // take into account requirements of geqp3() and orgqr()
2978 blas_int k = (std::min)(m,n);
2979 blas_int info = 0;
2980
2981 podarray<eT> tau( static_cast<uword>(k) );
2982 podarray<blas_int> jpvt( R_n_cols );
2983
2984 jpvt.zeros();
2985
2986 eT work_query[2];
2987 blas_int lwork_query = -1;
2988
2989 arma_extra_debug_print("lapack::geqp3()");
2990 lapack::geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), &work_query[0], &lwork_query, &info);
2991
2992 if(info != 0) { return false; }
2993
2994 blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
2995 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
2996
2997 podarray<eT> work( static_cast<uword>(lwork_final) );
2998
2999 arma_extra_debug_print("lapack::geqp3()");
3000 lapack::geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), work.memptr(), &lwork_final, &info);
3001
3002 if(info != 0) { return false; }
3003
3004 Q.set_size(R_n_rows, R_n_rows);
3005
3006 arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) );
3007
3008 //
3009 // construct R and P
3010
3011 P.set_size(R_n_cols, 1);
3012
3013 for(uword col=0; col < R_n_cols; ++col)
3014 {
3015 for(uword row=(col+1); row < R_n_rows; ++row) { R.at(row,col) = eT(0); }
3016
3017 P.at(col) = jpvt[col] - 1; // take into account that Fortran counts from 1
3018 }
3019
3020 arma_extra_debug_print("lapack::orgqr()");
3021 lapack::orgqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
3022
3023 return (info == 0);
3024 }
3025 #else
3026 {
3027 arma_ignore(Q);
3028 arma_ignore(R);
3029 arma_ignore(P);
3030 arma_ignore(X);
3031 arma_stop_logic_error("qr(): use of LAPACK must be enabled");
3032 return false;
3033 }
3034 #endif
3035 }
3036
3037
3038
3039 template<typename T, typename T1>
3040 inline
3041 bool
qr_pivot(Mat<std::complex<T>> & Q,Mat<std::complex<T>> & R,Mat<uword> & P,const Base<std::complex<T>,T1> & X)3042 auxlib::qr_pivot(Mat< std::complex<T> >& Q, Mat< std::complex<T> >& R, Mat<uword>& P, const Base<std::complex<T>,T1>& X)
3043 {
3044 arma_extra_debug_sigprint();
3045
3046 #if defined(ARMA_USE_LAPACK)
3047 {
3048 typedef typename std::complex<T> eT;
3049
3050 R = X.get_ref();
3051
3052 const uword R_n_rows = R.n_rows;
3053 const uword R_n_cols = R.n_cols;
3054
3055 if(R.is_empty())
3056 {
3057 Q.eye(R_n_rows, R_n_rows);
3058
3059 P.set_size(R_n_cols, 1);
3060
3061 for(uword col=0; col < R_n_cols; ++col) { P.at(col) = col; }
3062
3063 return true;
3064 }
3065
3066 arma_debug_assert_blas_size(R);
3067
3068 blas_int m = static_cast<blas_int>(R_n_rows);
3069 blas_int n = static_cast<blas_int>(R_n_cols);
3070 blas_int lwork_min = (std::max)(blas_int(3*n + 1), (std::max)(m,n)); // take into account requirements of geqp3() and ungqr()
3071 blas_int k = (std::min)(m,n);
3072 blas_int info = 0;
3073
3074 podarray<eT> tau( static_cast<uword>(k) );
3075 podarray< T> rwork( 2*R_n_cols );
3076 podarray<blas_int> jpvt( R_n_cols );
3077
3078 jpvt.zeros();
3079
3080 eT work_query[2];
3081 blas_int lwork_query = -1;
3082
3083 arma_extra_debug_print("lapack::geqp3()");
3084 lapack::cx_geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), &work_query[0], &lwork_query, rwork.memptr(), &info);
3085
3086 if(info != 0) { return false; }
3087
3088 blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
3089 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3090
3091 podarray<eT> work( static_cast<uword>(lwork_final) );
3092
3093 arma_extra_debug_print("lapack::geqp3()");
3094 lapack::cx_geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), work.memptr(), &lwork_final, rwork.memptr(), &info);
3095
3096 if(info != 0) { return false; }
3097
3098 Q.set_size(R_n_rows, R_n_rows);
3099
3100 arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) );
3101
3102 //
3103 // construct R and P
3104
3105 P.set_size(R_n_cols, 1);
3106
3107 for(uword col=0; col < R_n_cols; ++col)
3108 {
3109 for(uword row=(col+1); row < R_n_rows; ++row) { R.at(row,col) = eT(0); }
3110
3111 P.at(col) = jpvt[col] - 1; // take into account that Fortran counts from 1
3112 }
3113
3114 arma_extra_debug_print("lapack::ungqr()");
3115 lapack::ungqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info);
3116
3117 return (info == 0);
3118 }
3119 #else
3120 {
3121 arma_ignore(Q);
3122 arma_ignore(R);
3123 arma_ignore(P);
3124 arma_ignore(X);
3125 arma_stop_logic_error("qr(): use of LAPACK must be enabled");
3126 return false;
3127 }
3128 #endif
3129 }
3130
3131
3132
3133 template<typename eT>
3134 inline
3135 bool
svd(Col<eT> & S,Mat<eT> & A)3136 auxlib::svd(Col<eT>& S, Mat<eT>& A)
3137 {
3138 arma_extra_debug_sigprint();
3139
3140 #if defined(ARMA_USE_LAPACK)
3141 {
3142 if(A.is_empty()) { S.reset(); return true; }
3143
3144 arma_debug_assert_blas_size(A);
3145
3146 Mat<eT> U(1, 1, arma_nozeros_indicator());
3147 Mat<eT> V(1, A.n_cols, arma_nozeros_indicator());
3148
3149 char jobu = 'N';
3150 char jobvt = 'N';
3151
3152 blas_int m = blas_int(A.n_rows);
3153 blas_int n = blas_int(A.n_cols);
3154 blas_int min_mn = (std::min)(m,n);
3155 blas_int lda = blas_int(A.n_rows);
3156 blas_int ldu = blas_int(U.n_rows);
3157 blas_int ldvt = blas_int(V.n_rows);
3158 blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) );
3159 blas_int info = 0;
3160
3161 S.set_size( static_cast<uword>(min_mn) );
3162
3163 blas_int lwork_proposed = 0;
3164
3165 if((m*n) >= 1024)
3166 {
3167 eT work_query[2];
3168 blas_int lwork_query = -1;
3169
3170 arma_extra_debug_print("lapack::gesvd()");
3171 lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info);
3172
3173 if(info != 0) { return false; }
3174
3175 lwork_proposed = static_cast<blas_int>( work_query[0] );
3176 }
3177
3178 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3179
3180 podarray<eT> work( static_cast<uword>(lwork_final) );
3181
3182 arma_extra_debug_print("lapack::gesvd()");
3183 lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info);
3184
3185 return (info == 0);
3186 }
3187 #else
3188 {
3189 arma_ignore(S);
3190 arma_ignore(A);
3191 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3192 return false;
3193 }
3194 #endif
3195 }
3196
3197
3198
3199 template<typename T>
3200 inline
3201 bool
svd(Col<T> & S,Mat<std::complex<T>> & A)3202 auxlib::svd(Col<T>& S, Mat< std::complex<T> >& A)
3203 {
3204 arma_extra_debug_sigprint();
3205
3206 #if defined(ARMA_USE_LAPACK)
3207 {
3208 typedef std::complex<T> eT;
3209
3210 if(A.is_empty()) { S.reset(); return true; }
3211
3212 arma_debug_assert_blas_size(A);
3213
3214 Mat<eT> U(1, 1, arma_nozeros_indicator());
3215 Mat<eT> V(1, A.n_cols, arma_nozeros_indicator());
3216
3217 char jobu = 'N';
3218 char jobvt = 'N';
3219
3220 blas_int m = blas_int(A.n_rows);
3221 blas_int n = blas_int(A.n_cols);
3222 blas_int min_mn = (std::min)(m,n);
3223 blas_int lda = blas_int(A.n_rows);
3224 blas_int ldu = blas_int(U.n_rows);
3225 blas_int ldvt = blas_int(V.n_rows);
3226 blas_int lwork_min = (std::max)( blas_int(1), 2*min_mn+(std::max)(m,n) );
3227 blas_int info = 0;
3228
3229 S.set_size( static_cast<uword>(min_mn) );
3230
3231 podarray<T> rwork( static_cast<uword>(5*min_mn) );
3232
3233 blas_int lwork_proposed = 0;
3234
3235 if((m*n) >= 1024)
3236 {
3237 eT work_query[2];
3238 blas_int lwork_query = -1; // query to find optimum size of workspace
3239
3240 arma_extra_debug_print("lapack::cx_gesvd()");
3241 lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info);
3242
3243 if(info != 0) { return false; }
3244
3245 lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
3246 }
3247
3248 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3249
3250 podarray<eT> work( static_cast<uword>(lwork_final) );
3251
3252 arma_extra_debug_print("lapack::cx_gesvd()");
3253 lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info);
3254
3255 return (info == 0);
3256 }
3257 #else
3258 {
3259 arma_ignore(S);
3260 arma_ignore(A);
3261 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3262 return false;
3263 }
3264 #endif
3265 }
3266
3267
3268
3269 template<typename eT>
3270 inline
3271 bool
svd(Mat<eT> & U,Col<eT> & S,Mat<eT> & V,Mat<eT> & A)3272 auxlib::svd(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A)
3273 {
3274 arma_extra_debug_sigprint();
3275
3276 #if defined(ARMA_USE_LAPACK)
3277 {
3278 if(A.is_empty())
3279 {
3280 U.eye(A.n_rows, A.n_rows);
3281 S.reset();
3282 V.eye(A.n_cols, A.n_cols);
3283 return true;
3284 }
3285
3286 arma_debug_assert_blas_size(A);
3287
3288 U.set_size(A.n_rows, A.n_rows);
3289 V.set_size(A.n_cols, A.n_cols);
3290
3291 char jobu = 'A';
3292 char jobvt = 'A';
3293
3294 blas_int m = blas_int(A.n_rows);
3295 blas_int n = blas_int(A.n_cols);
3296 blas_int min_mn = (std::min)(m,n);
3297 blas_int lda = blas_int(A.n_rows);
3298 blas_int ldu = blas_int(U.n_rows);
3299 blas_int ldvt = blas_int(V.n_rows);
3300 blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) );
3301 blas_int info = 0;
3302
3303 S.set_size( static_cast<uword>(min_mn) );
3304
3305 blas_int lwork_proposed = 0;
3306
3307 if((m*n) >= 1024)
3308 {
3309 // query to find optimum size of workspace
3310 eT work_query[2];
3311 blas_int lwork_query = -1;
3312
3313 arma_extra_debug_print("lapack::gesvd()");
3314 lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info);
3315
3316 if(info != 0) { return false; }
3317
3318 lwork_proposed = static_cast<blas_int>( work_query[0] );
3319 }
3320
3321 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3322
3323 podarray<eT> work( static_cast<uword>(lwork_final) );
3324
3325 arma_extra_debug_print("lapack::gesvd()");
3326 lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info);
3327
3328 if(info != 0) { return false; }
3329
3330 op_strans::apply_mat_inplace(V);
3331
3332 return true;
3333 }
3334 #else
3335 {
3336 arma_ignore(U);
3337 arma_ignore(S);
3338 arma_ignore(V);
3339 arma_ignore(A);
3340 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3341 return false;
3342 }
3343 #endif
3344 }
3345
3346
3347
3348 template<typename T>
3349 inline
3350 bool
svd(Mat<std::complex<T>> & U,Col<T> & S,Mat<std::complex<T>> & V,Mat<std::complex<T>> & A)3351 auxlib::svd(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A)
3352 {
3353 arma_extra_debug_sigprint();
3354
3355 #if defined(ARMA_USE_LAPACK)
3356 {
3357 typedef std::complex<T> eT;
3358
3359 if(A.is_empty())
3360 {
3361 U.eye(A.n_rows, A.n_rows);
3362 S.reset();
3363 V.eye(A.n_cols, A.n_cols);
3364 return true;
3365 }
3366
3367 arma_debug_assert_blas_size(A);
3368
3369 U.set_size(A.n_rows, A.n_rows);
3370 V.set_size(A.n_cols, A.n_cols);
3371
3372 char jobu = 'A';
3373 char jobvt = 'A';
3374
3375 blas_int m = blas_int(A.n_rows);
3376 blas_int n = blas_int(A.n_cols);
3377 blas_int min_mn = (std::min)(m,n);
3378 blas_int lda = blas_int(A.n_rows);
3379 blas_int ldu = blas_int(U.n_rows);
3380 blas_int ldvt = blas_int(V.n_rows);
3381 blas_int lwork_min = (std::max)( blas_int(1), 2*min_mn + (std::max)(m,n) );
3382 blas_int info = 0;
3383
3384 S.set_size( static_cast<uword>(min_mn) );
3385
3386 podarray<T> rwork( static_cast<uword>(5*min_mn) );
3387
3388 blas_int lwork_proposed = 0;
3389
3390 if((m*n) >= 1024)
3391 {
3392 eT work_query[2];
3393 blas_int lwork_query = -1; // query to find optimum size of workspace
3394
3395 arma_extra_debug_print("lapack::cx_gesvd()");
3396 lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info);
3397
3398 if(info != 0) { return false; }
3399
3400 lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
3401 }
3402
3403 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3404
3405 podarray<eT> work( static_cast<uword>(lwork_final) );
3406
3407 arma_extra_debug_print("lapack::cx_gesvd()");
3408 lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info);
3409
3410 if(info != 0) { return false; }
3411
3412 op_htrans::apply_mat_inplace(V);
3413
3414 return true;
3415 }
3416 #else
3417 {
3418 arma_ignore(U);
3419 arma_ignore(S);
3420 arma_ignore(V);
3421 arma_ignore(A);
3422 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3423 return false;
3424 }
3425 #endif
3426 }
3427
3428
3429
3430 template<typename eT>
3431 inline
3432 bool
svd_econ(Mat<eT> & U,Col<eT> & S,Mat<eT> & V,Mat<eT> & A,const char mode)3433 auxlib::svd_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A, const char mode)
3434 {
3435 arma_extra_debug_sigprint();
3436
3437 #if defined(ARMA_USE_LAPACK)
3438 {
3439 if(A.is_empty())
3440 {
3441 U.eye();
3442 S.reset();
3443 V.eye();
3444 return true;
3445 }
3446
3447 arma_debug_assert_blas_size(A);
3448
3449 blas_int m = blas_int(A.n_rows);
3450 blas_int n = blas_int(A.n_cols);
3451 blas_int min_mn = (std::min)(m,n);
3452 blas_int lda = blas_int(A.n_rows);
3453
3454 S.set_size( static_cast<uword>(min_mn) );
3455
3456 blas_int ldu = 0;
3457 blas_int ldvt = 0;
3458
3459 char jobu = char(0);
3460 char jobvt = char(0);
3461
3462 if(mode == 'l')
3463 {
3464 jobu = 'S';
3465 jobvt = 'N';
3466
3467 ldu = m;
3468 ldvt = 1;
3469
3470 U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) );
3471 V.reset();
3472 }
3473
3474 if(mode == 'r')
3475 {
3476 jobu = 'N';
3477 jobvt = 'S';
3478
3479 ldu = 1;
3480 ldvt = (std::min)(m,n);
3481
3482 U.reset();
3483 V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) );
3484 }
3485
3486 if(mode == 'b')
3487 {
3488 jobu = 'S';
3489 jobvt = 'S';
3490
3491 ldu = m;
3492 ldvt = (std::min)(m,n);
3493
3494 U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) );
3495 V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n ) );
3496 }
3497
3498
3499 blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) );
3500 blas_int info = 0;
3501
3502 blas_int lwork_proposed = 0;
3503
3504 if((m*n) >= 1024)
3505 {
3506 eT work_query[2];
3507 blas_int lwork_query = -1; // query to find optimum size of workspace
3508
3509 arma_extra_debug_print("lapack::gesvd()");
3510 lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info);
3511
3512 if(info != 0) { return false; }
3513
3514 lwork_proposed = static_cast<blas_int>(work_query[0]);
3515 }
3516
3517 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3518
3519 podarray<eT> work( static_cast<uword>(lwork_final) );
3520
3521 arma_extra_debug_print("lapack::gesvd()");
3522 lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info);
3523
3524 if(info != 0) { return false; }
3525
3526 op_strans::apply_mat_inplace(V);
3527
3528 return true;
3529 }
3530 #else
3531 {
3532 arma_ignore(U);
3533 arma_ignore(S);
3534 arma_ignore(V);
3535 arma_ignore(A);
3536 arma_ignore(mode);
3537 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3538 return false;
3539 }
3540 #endif
3541 }
3542
3543
3544
3545 template<typename T>
3546 inline
3547 bool
svd_econ(Mat<std::complex<T>> & U,Col<T> & S,Mat<std::complex<T>> & V,Mat<std::complex<T>> & A,const char mode)3548 auxlib::svd_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A, const char mode)
3549 {
3550 arma_extra_debug_sigprint();
3551
3552 #if defined(ARMA_USE_LAPACK)
3553 {
3554 typedef std::complex<T> eT;
3555
3556 if(A.is_empty())
3557 {
3558 U.eye();
3559 S.reset();
3560 V.eye();
3561 return true;
3562 }
3563
3564 arma_debug_assert_blas_size(A);
3565
3566 blas_int m = blas_int(A.n_rows);
3567 blas_int n = blas_int(A.n_cols);
3568 blas_int min_mn = (std::min)(m,n);
3569 blas_int lda = blas_int(A.n_rows);
3570
3571 S.set_size( static_cast<uword>(min_mn) );
3572
3573 blas_int ldu = 0;
3574 blas_int ldvt = 0;
3575
3576 char jobu = char(0);
3577 char jobvt = char(0);
3578
3579 if(mode == 'l')
3580 {
3581 jobu = 'S';
3582 jobvt = 'N';
3583
3584 ldu = m;
3585 ldvt = 1;
3586
3587 U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) );
3588 V.reset();
3589 }
3590
3591 if(mode == 'r')
3592 {
3593 jobu = 'N';
3594 jobvt = 'S';
3595
3596 ldu = 1;
3597 ldvt = (std::min)(m,n);
3598
3599 U.reset();
3600 V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) );
3601 }
3602
3603 if(mode == 'b')
3604 {
3605 jobu = 'S';
3606 jobvt = 'S';
3607
3608 ldu = m;
3609 ldvt = (std::min)(m,n);
3610
3611 U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) );
3612 V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) );
3613 }
3614
3615 blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) );
3616 blas_int info = 0;
3617
3618 podarray<T> rwork( static_cast<uword>(5*min_mn) );
3619
3620 blas_int lwork_proposed = 0;
3621
3622 if((m*n) >= 1024)
3623 {
3624 eT work_query[2];
3625 blas_int lwork_query = -1; // query to find optimum size of workspace
3626
3627 arma_extra_debug_print("lapack::cx_gesvd()");
3628 lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info);
3629
3630 if(info != 0) { return false; }
3631
3632 lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
3633 }
3634
3635 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3636
3637 podarray<eT> work( static_cast<uword>(lwork_final) );
3638
3639 arma_extra_debug_print("lapack::cx_gesvd()");
3640 lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info);
3641
3642 if(info != 0) { return false; }
3643
3644 op_htrans::apply_mat_inplace(V);
3645
3646 return true;
3647 }
3648 #else
3649 {
3650 arma_ignore(U);
3651 arma_ignore(S);
3652 arma_ignore(V);
3653 arma_ignore(A);
3654 arma_ignore(mode);
3655 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3656 return false;
3657 }
3658 #endif
3659 }
3660
3661
3662
3663 template<typename eT>
3664 inline
3665 bool
svd_dc(Col<eT> & S,Mat<eT> & A)3666 auxlib::svd_dc(Col<eT>& S, Mat<eT>& A)
3667 {
3668 arma_extra_debug_sigprint();
3669
3670 #if defined(ARMA_USE_LAPACK)
3671 {
3672 if(A.is_empty()) { S.reset(); return true; }
3673
3674 arma_debug_assert_blas_size(A);
3675
3676 Mat<eT> U(1, 1, arma_nozeros_indicator());
3677 Mat<eT> V(1, 1, arma_nozeros_indicator());
3678
3679 char jobz = 'N';
3680
3681 blas_int m = blas_int(A.n_rows);
3682 blas_int n = blas_int(A.n_cols);
3683 blas_int min_mn = (std::min)(m,n);
3684 blas_int max_mn = (std::max)(m,n);
3685 blas_int lda = blas_int(A.n_rows);
3686 blas_int ldu = blas_int(U.n_rows);
3687 blas_int ldvt = blas_int(V.n_rows);
3688 blas_int lwork_min = 3*min_mn + (std::max)( max_mn, 7*min_mn );
3689 blas_int info = 0;
3690
3691 S.set_size( static_cast<uword>(min_mn) );
3692
3693 podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
3694
3695 blas_int lwork_proposed = 0;
3696
3697 if((m*n) >= 1024)
3698 {
3699 eT work_query[2];
3700 blas_int lwork_query = blas_int(-1);
3701
3702 arma_extra_debug_print("lapack::gesdd()");
3703 lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info);
3704
3705 if(info != 0) { return false; }
3706
3707 lwork_proposed = static_cast<blas_int>( work_query[0] );
3708 }
3709
3710 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3711
3712 podarray<eT> work( static_cast<uword>(lwork_final) );
3713
3714 arma_extra_debug_print("lapack::gesdd()");
3715 lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info);
3716
3717 return (info == 0);
3718 }
3719 #else
3720 {
3721 arma_ignore(S);
3722 arma_ignore(A);
3723 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3724 return false;
3725 }
3726 #endif
3727 }
3728
3729
3730
3731 template<typename T>
3732 inline
3733 bool
svd_dc(Col<T> & S,Mat<std::complex<T>> & A)3734 auxlib::svd_dc(Col<T>& S, Mat< std::complex<T> >& A)
3735 {
3736 arma_extra_debug_sigprint();
3737
3738 #if defined(ARMA_USE_LAPACK)
3739 {
3740 typedef std::complex<T> eT;
3741
3742 if(A.is_empty()) { S.reset(); return true; }
3743
3744 arma_debug_assert_blas_size(A);
3745
3746 Mat<eT> U(1, 1, arma_nozeros_indicator());
3747 Mat<eT> V(1, 1, arma_nozeros_indicator());
3748
3749 char jobz = 'N';
3750
3751 blas_int m = blas_int(A.n_rows);
3752 blas_int n = blas_int(A.n_cols);
3753 blas_int min_mn = (std::min)(m,n);
3754 blas_int max_mn = (std::max)(m,n);
3755 blas_int lda = blas_int(A.n_rows);
3756 blas_int ldu = blas_int(U.n_rows);
3757 blas_int ldvt = blas_int(V.n_rows);
3758 blas_int lwork_min = 2*min_mn + max_mn;
3759 blas_int info = 0;
3760
3761 S.set_size( static_cast<uword>(min_mn) );
3762
3763 podarray<T> rwork( static_cast<uword>(7*min_mn) ); // from LAPACK 3.8 docs: LAPACK <= v3.6 needs 7*mn
3764 podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
3765
3766 blas_int lwork_proposed = 0;
3767
3768 if((m*n) >= 1024)
3769 {
3770 eT work_query[2];
3771 blas_int lwork_query = blas_int(-1);
3772
3773 arma_extra_debug_print("lapack::cx_gesdd()");
3774 lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info);
3775
3776 if(info != 0) { return false; }
3777
3778 lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
3779 }
3780
3781 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3782
3783 podarray<eT> work( static_cast<uword>(lwork_final) );
3784
3785 arma_extra_debug_print("lapack::cx_gesdd()");
3786 lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info);
3787
3788 return (info == 0);
3789 }
3790 #else
3791 {
3792 arma_ignore(S);
3793 arma_ignore(A);
3794 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3795 return false;
3796 }
3797 #endif
3798 }
3799
3800
3801
3802 template<typename eT>
3803 inline
3804 bool
svd_dc(Mat<eT> & U,Col<eT> & S,Mat<eT> & V,Mat<eT> & A)3805 auxlib::svd_dc(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A)
3806 {
3807 arma_extra_debug_sigprint();
3808
3809 #if defined(ARMA_USE_LAPACK)
3810 {
3811 if(A.is_empty())
3812 {
3813 U.eye(A.n_rows, A.n_rows);
3814 S.reset();
3815 V.eye(A.n_cols, A.n_cols);
3816 return true;
3817 }
3818
3819 arma_debug_assert_blas_size(A);
3820
3821 U.set_size(A.n_rows, A.n_rows);
3822 V.set_size(A.n_cols, A.n_cols);
3823
3824 char jobz = 'A';
3825
3826 blas_int m = blas_int(A.n_rows);
3827 blas_int n = blas_int(A.n_cols);
3828 blas_int min_mn = (std::min)(m,n);
3829 blas_int max_mn = (std::max)(m,n);
3830 blas_int lda = blas_int(A.n_rows);
3831 blas_int ldu = blas_int(U.n_rows);
3832 blas_int ldvt = blas_int(V.n_rows);
3833 blas_int lwork1 = 3*min_mn*min_mn + (std::max)(max_mn, 4*min_mn*min_mn + 4*min_mn); // as per LAPACK 3.2 docs
3834 blas_int lwork2 = 4*min_mn*min_mn + 6*min_mn + max_mn; // as per LAPACK 3.8 docs; consistent with LAPACK 3.4 docs
3835 blas_int lwork_min = (std::max)(lwork1, lwork2); // due to differences between LAPACK 3.2 and 3.8
3836 blas_int info = 0;
3837
3838 S.set_size( static_cast<uword>(min_mn) );
3839
3840 podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
3841
3842 blas_int lwork_proposed = 0;
3843
3844 if((m*n) >= 1024)
3845 {
3846 eT work_query[2];
3847 blas_int lwork_query = blas_int(-1);
3848
3849 arma_extra_debug_print("lapack::gesdd()");
3850 lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info);
3851
3852 if(info != 0) { return false; }
3853
3854 lwork_proposed = static_cast<blas_int>(work_query[0]);
3855 }
3856
3857 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3858
3859 podarray<eT> work( static_cast<uword>(lwork_final) );
3860
3861 arma_extra_debug_print("lapack::gesdd()");
3862 lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info);
3863
3864 if(info != 0) { return false; }
3865
3866 op_strans::apply_mat_inplace(V);
3867
3868 return true;
3869 }
3870 #else
3871 {
3872 arma_ignore(U);
3873 arma_ignore(S);
3874 arma_ignore(V);
3875 arma_ignore(A);
3876 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3877 return false;
3878 }
3879 #endif
3880 }
3881
3882
3883
3884 template<typename T>
3885 inline
3886 bool
svd_dc(Mat<std::complex<T>> & U,Col<T> & S,Mat<std::complex<T>> & V,Mat<std::complex<T>> & A)3887 auxlib::svd_dc(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A)
3888 {
3889 arma_extra_debug_sigprint();
3890
3891 #if defined(ARMA_USE_LAPACK)
3892 {
3893 typedef std::complex<T> eT;
3894
3895 if(A.is_empty())
3896 {
3897 U.eye(A.n_rows, A.n_rows);
3898 S.reset();
3899 V.eye(A.n_cols, A.n_cols);
3900 return true;
3901 }
3902
3903 arma_debug_assert_blas_size(A);
3904
3905 U.set_size(A.n_rows, A.n_rows);
3906 V.set_size(A.n_cols, A.n_cols);
3907
3908 char jobz = 'A';
3909
3910 blas_int m = blas_int(A.n_rows);
3911 blas_int n = blas_int(A.n_cols);
3912 blas_int min_mn = (std::min)(m,n);
3913 blas_int max_mn = (std::max)(m,n);
3914 blas_int lda = blas_int(A.n_rows);
3915 blas_int ldu = blas_int(U.n_rows);
3916 blas_int ldvt = blas_int(V.n_rows);
3917 blas_int lwork_min = min_mn*min_mn + 2*min_mn + max_mn; // as per LAPACK 3.2, 3.4, 3.8 docs
3918 blas_int lrwork = min_mn * ((std::max)(5*min_mn+7, 2*max_mn + 2*min_mn+1)); // as per LAPACK 3.4 docs; LAPACK 3.8 uses 5*min_mn+5 instead of 5*min_mn+7
3919 blas_int info = 0;
3920
3921 S.set_size( static_cast<uword>(min_mn) );
3922
3923 podarray<T> rwork( static_cast<uword>(lrwork ) );
3924 podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
3925
3926 blas_int lwork_proposed = 0;
3927
3928 if((m*n) >= 1024)
3929 {
3930 eT work_query[2];
3931 blas_int lwork_query = blas_int(-1);
3932
3933 arma_extra_debug_print("lapack::cx_gesdd()");
3934 lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info);
3935
3936 if(info != 0) { return false; }
3937
3938 lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
3939 }
3940
3941 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
3942
3943 podarray<eT> work( static_cast<uword>(lwork_final) );
3944
3945 arma_extra_debug_print("lapack::cx_gesdd()");
3946 lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info);
3947
3948 if(info != 0) { return false; }
3949
3950 op_htrans::apply_mat_inplace(V);
3951
3952 return true;
3953 }
3954 #else
3955 {
3956 arma_ignore(U);
3957 arma_ignore(S);
3958 arma_ignore(V);
3959 arma_ignore(A);
3960 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
3961 return false;
3962 }
3963 #endif
3964 }
3965
3966
3967
3968 template<typename eT>
3969 inline
3970 bool
svd_dc_econ(Mat<eT> & U,Col<eT> & S,Mat<eT> & V,Mat<eT> & A)3971 auxlib::svd_dc_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A)
3972 {
3973 arma_extra_debug_sigprint();
3974
3975 #if defined(ARMA_USE_LAPACK)
3976 {
3977 arma_debug_assert_blas_size(A);
3978
3979 char jobz = 'S';
3980
3981 blas_int m = blas_int(A.n_rows);
3982 blas_int n = blas_int(A.n_cols);
3983 blas_int min_mn = (std::min)(m,n);
3984 blas_int max_mn = (std::max)(m,n);
3985 blas_int lda = blas_int(A.n_rows);
3986 blas_int ldu = m;
3987 blas_int ldvt = min_mn;
3988 blas_int lwork1 = 3*min_mn*min_mn + (std::max)( max_mn, 4*min_mn*min_mn + 4*min_mn ); // as per LAPACK 3.2 docs
3989 blas_int lwork2 = 4*min_mn*min_mn + 6*min_mn + max_mn; // as per LAPACK 3.4 docs; LAPACK 3.8 requires 4*min_mn*min_mn + 7*min_mn
3990 blas_int lwork_min = (std::max)(lwork1, lwork2); // due to differences between LAPACK 3.2 and 3.4
3991 blas_int info = 0;
3992
3993 if(A.is_empty())
3994 {
3995 U.eye();
3996 S.reset();
3997 V.eye( static_cast<uword>(n), static_cast<uword>(min_mn) );
3998 return true;
3999 }
4000
4001 S.set_size( static_cast<uword>(min_mn) );
4002
4003 U.set_size( static_cast<uword>(m), static_cast<uword>(min_mn) );
4004
4005 V.set_size( static_cast<uword>(min_mn), static_cast<uword>(n) );
4006
4007 podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
4008
4009 blas_int lwork_proposed = 0;
4010
4011 if((m*n) >= 1024)
4012 {
4013 eT work_query[2];
4014 blas_int lwork_query = blas_int(-1);
4015
4016 arma_extra_debug_print("lapack::gesdd()");
4017 lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info);
4018
4019 if(info != 0) { return false; }
4020
4021 lwork_proposed = static_cast<blas_int>(work_query[0]);
4022 }
4023
4024 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
4025
4026 podarray<eT> work( static_cast<uword>(lwork_final) );
4027
4028 arma_extra_debug_print("lapack::gesdd()");
4029 lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info);
4030
4031 if(info != 0) { return false; }
4032
4033 op_strans::apply_mat_inplace(V);
4034
4035 return true;
4036 }
4037 #else
4038 {
4039 arma_ignore(U);
4040 arma_ignore(S);
4041 arma_ignore(V);
4042 arma_ignore(A);
4043 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
4044 return false;
4045 }
4046 #endif
4047 }
4048
4049
4050
4051 template<typename T>
4052 inline
4053 bool
svd_dc_econ(Mat<std::complex<T>> & U,Col<T> & S,Mat<std::complex<T>> & V,Mat<std::complex<T>> & A)4054 auxlib::svd_dc_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A)
4055 {
4056 arma_extra_debug_sigprint();
4057
4058 #if defined(ARMA_USE_LAPACK)
4059 {
4060 typedef std::complex<T> eT;
4061
4062 arma_debug_assert_blas_size(A);
4063
4064 char jobz = 'S';
4065
4066 blas_int m = blas_int(A.n_rows);
4067 blas_int n = blas_int(A.n_cols);
4068 blas_int min_mn = (std::min)(m,n);
4069 blas_int max_mn = (std::max)(m,n);
4070 blas_int lda = blas_int(A.n_rows);
4071 blas_int ldu = m;
4072 blas_int ldvt = min_mn;
4073 blas_int lwork_min = min_mn*min_mn + 2*min_mn + max_mn; // as per LAPACK 3.2 docs
4074 blas_int lrwork = min_mn * ((std::max)(5*min_mn+7, 2*max_mn + 2*min_mn+1)); // LAPACK 3.8 uses 5*min_mn+5 instead of 5*min_mn+7
4075 blas_int info = 0;
4076
4077 if(A.is_empty())
4078 {
4079 U.eye();
4080 S.reset();
4081 V.eye( static_cast<uword>(n), static_cast<uword>(min_mn) );
4082 return true;
4083 }
4084
4085 S.set_size( static_cast<uword>(min_mn) );
4086
4087 U.set_size( static_cast<uword>(m), static_cast<uword>(min_mn) );
4088
4089 V.set_size( static_cast<uword>(min_mn), static_cast<uword>(n) );
4090
4091 podarray<T> rwork( static_cast<uword>(lrwork ) );
4092 podarray<blas_int> iwork( static_cast<uword>(8*min_mn) );
4093
4094 blas_int lwork_proposed = 0;
4095
4096 if((m*n) >= 1024)
4097 {
4098 eT work_query[2];
4099 blas_int lwork_query = blas_int(-1);
4100
4101 arma_extra_debug_print("lapack::cx_gesdd()");
4102 lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info);
4103
4104 if(info != 0) { return false; }
4105
4106 lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
4107 }
4108
4109 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
4110
4111 podarray<eT> work( static_cast<uword>(lwork_final) );
4112
4113 arma_extra_debug_print("lapack::cx_gesdd()");
4114 lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info);
4115
4116 if(info != 0) { return false; }
4117
4118 op_htrans::apply_mat_inplace(V);
4119
4120 return true;
4121 }
4122 #else
4123 {
4124 arma_ignore(U);
4125 arma_ignore(S);
4126 arma_ignore(V);
4127 arma_ignore(A);
4128 arma_stop_logic_error("svd(): use of LAPACK must be enabled");
4129 return false;
4130 }
4131 #endif
4132 }
4133
4134
4135
4136 //! solve a system of linear equations via explicit inverse (tiny matrices)
4137 template<typename T1>
4138 arma_cold
4139 inline
4140 bool
solve_square_tiny(Mat<typename T1::elem_type> & out,const Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr)4141 auxlib::solve_square_tiny(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
4142 {
4143 arma_extra_debug_sigprint();
4144
4145 // NOTE: assuming A has size <= 4x4
4146
4147 typedef typename T1::elem_type eT;
4148
4149 const uword A_n_rows = A.n_rows;
4150
4151 Mat<eT> A_inv(A_n_rows, A_n_rows, arma_nozeros_indicator());
4152
4153 const bool status = op_inv::apply_tiny_noalias(A_inv, A);
4154
4155 if(status == false) { return false; }
4156
4157 const quasi_unwrap<T1> UB(B_expr.get_ref());
4158 const Mat<eT>& B = UB.M;
4159
4160 const uword B_n_rows = B.n_rows;
4161 const uword B_n_cols = B.n_cols;
4162
4163 arma_debug_check( (A_n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
4164
4165 if(A.is_empty() || B.is_empty())
4166 {
4167 out.zeros(A.n_cols, B_n_cols);
4168 return true;
4169 }
4170
4171 if(UB.is_alias(out))
4172 {
4173 Mat<eT> tmp(A_n_rows, B_n_cols, arma_nozeros_indicator());
4174
4175 gemm_emul<false,false,false,false>::apply(tmp, A_inv, B);
4176
4177 out.steal_mem(tmp);
4178 }
4179 else
4180 {
4181 out.set_size(A_n_rows, B_n_cols);
4182
4183 gemm_emul<false,false,false,false>::apply(out, A_inv, B);
4184 }
4185
4186 return true;
4187 }
4188
4189
4190
4191 //! solve a system of linear equations via LU decomposition
4192 template<typename T1>
4193 inline
4194 bool
solve_square_fast(Mat<typename T1::elem_type> & out,Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr)4195 auxlib::solve_square_fast(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
4196 {
4197 arma_extra_debug_sigprint();
4198
4199 typedef typename T1::elem_type eT;
4200
4201 const uword A_n_rows = A.n_rows;
4202
4203 if((A_n_rows <= 4) && is_cx<eT>::no)
4204 {
4205 const bool status = auxlib::solve_square_tiny(out, A, B_expr.get_ref());
4206
4207 if(status) { return true; }
4208 }
4209
4210 out = B_expr.get_ref();
4211
4212 const uword B_n_rows = out.n_rows;
4213 const uword B_n_cols = out.n_cols;
4214
4215 arma_debug_check( (A_n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
4216
4217 if(A.is_empty() || out.is_empty())
4218 {
4219 out.zeros(A.n_cols, B_n_cols);
4220 return true;
4221 }
4222
4223 #if defined(ARMA_USE_ATLAS)
4224 {
4225 arma_debug_assert_atlas_size(A);
4226
4227 podarray<int> ipiv(A_n_rows + 2); // +2 for paranoia: old versions of Atlas might be trashing memory
4228
4229 arma_extra_debug_print("atlas::clapack_gesv()");
4230 int info = atlas::clapack_gesv<eT>(atlas::CblasColMajor, A_n_rows, B_n_cols, A.memptr(), A_n_rows, ipiv.memptr(), out.memptr(), A_n_rows);
4231
4232 return (info == 0);
4233 }
4234 #elif defined(ARMA_USE_LAPACK)
4235 {
4236 arma_debug_assert_blas_size(A);
4237
4238 blas_int n = blas_int(A_n_rows); // assuming A is square
4239 blas_int lda = blas_int(A_n_rows);
4240 blas_int ldb = blas_int(B_n_rows);
4241 blas_int nrhs = blas_int(B_n_cols);
4242 blas_int info = blas_int(0);
4243
4244 podarray<blas_int> ipiv(A_n_rows + 2); // +2 for paranoia: some versions of Lapack might be trashing memory
4245
4246 arma_extra_debug_print("lapack::gesv()");
4247 lapack::gesv<eT>(&n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
4248
4249 return (info == 0);
4250 }
4251 #else
4252 {
4253 arma_stop_logic_error("solve(): use of ATLAS or LAPACK must be enabled");
4254 return false;
4255 }
4256 #endif
4257 }
4258
4259
4260
4261 //! solve a system of linear equations via LU decomposition with rcond estimate
4262 template<typename T1>
4263 inline
4264 bool
solve_square_rcond(Mat<typename T1::elem_type> & out,typename T1::pod_type & out_rcond,Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr,const bool allow_ugly)4265 auxlib::solve_square_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr, const bool allow_ugly)
4266 {
4267 arma_extra_debug_sigprint();
4268
4269 #if defined(ARMA_USE_LAPACK)
4270 {
4271 typedef typename T1::elem_type eT;
4272 typedef typename T1::pod_type T;
4273
4274 out_rcond = T(0);
4275
4276 out = B_expr.get_ref();
4277
4278 const uword B_n_rows = out.n_rows;
4279 const uword B_n_cols = out.n_cols;
4280
4281 arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
4282
4283 if(A.is_empty() || out.is_empty())
4284 {
4285 out.zeros(A.n_cols, B_n_cols);
4286 return true;
4287 }
4288
4289 arma_debug_assert_blas_size(A);
4290
4291 char norm_id = '1';
4292 char trans = 'N';
4293 blas_int n = blas_int(A.n_rows); // assuming A is square
4294 blas_int lda = blas_int(A.n_rows);
4295 blas_int ldb = blas_int(B_n_rows);
4296 blas_int nrhs = blas_int(B_n_cols);
4297 blas_int info = blas_int(0);
4298 T norm_val = T(0);
4299
4300 podarray<T> junk(1);
4301 podarray<blas_int> ipiv(A.n_rows + 2); // +2 for paranoia
4302
4303 arma_extra_debug_print("lapack::lange()");
4304 norm_val = lapack::lange<eT>(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr());
4305
4306 arma_extra_debug_print("lapack::getrf()");
4307 lapack::getrf<eT>(&n, &n, A.memptr(), &n, ipiv.memptr(), &info);
4308
4309 if(info != blas_int(0)) { return false; }
4310
4311 arma_extra_debug_print("lapack::getrs()");
4312 lapack::getrs<eT>(&trans, &n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
4313
4314 if(info != blas_int(0)) { return false; }
4315
4316 out_rcond = auxlib::lu_rcond<T>(A, norm_val);
4317
4318 if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; }
4319
4320 return true;
4321 }
4322 #else
4323 {
4324 arma_ignore(out);
4325 arma_ignore(out_rcond);
4326 arma_ignore(A);
4327 arma_ignore(B_expr);
4328 arma_ignore(allow_ugly);
4329 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
4330 return false;
4331 }
4332 #endif
4333 }
4334
4335
4336
4337 //! solve a system of linear equations via LU decomposition with refinement (real matrices)
4338 template<typename T1>
4339 inline
4340 bool
solve_square_refine(Mat<typename T1::pod_type> & out,typename T1::pod_type & out_rcond,Mat<typename T1::pod_type> & A,const Base<typename T1::pod_type,T1> & B_expr,const bool equilibrate,const bool allow_ugly)4341 auxlib::solve_square_refine(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr, const bool equilibrate, const bool allow_ugly)
4342 {
4343 arma_extra_debug_sigprint();
4344
4345 #if defined(ARMA_USE_LAPACK)
4346 {
4347 typedef typename T1::pod_type eT;
4348
4349 // Mat<eT> B = B_expr.get_ref(); // B is overwritten by lapack::gesvx() if equilibrate is enabled
4350
4351 quasi_unwrap<T1> UB(B_expr.get_ref()); // deliberately not declaring as const
4352
4353 const Mat<eT>& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below
4354
4355 const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out));
4356
4357 Mat<eT> B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; }
4358
4359 const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat;
4360
4361 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
4362
4363 if(A.is_empty() || B.is_empty())
4364 {
4365 out.zeros(A.n_rows, B.n_cols);
4366 return true;
4367 }
4368
4369 arma_debug_assert_blas_size(A,B);
4370
4371 out.set_size(A.n_rows, B.n_cols);
4372
4373 char fact = (equilibrate) ? 'E' : 'N';
4374 char trans = 'N';
4375 char equed = char(0);
4376 blas_int n = blas_int(A.n_rows);
4377 blas_int nrhs = blas_int(B.n_cols);
4378 blas_int lda = blas_int(A.n_rows);
4379 blas_int ldaf = blas_int(A.n_rows);
4380 blas_int ldb = blas_int(A.n_rows);
4381 blas_int ldx = blas_int(A.n_rows);
4382 blas_int info = blas_int(0);
4383 eT rcond = eT(0);
4384
4385 Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator());
4386
4387 podarray<blas_int> IPIV( A.n_rows);
4388 podarray<eT> R( A.n_rows);
4389 podarray<eT> C( A.n_rows);
4390 podarray<eT> FERR( B.n_cols);
4391 podarray<eT> BERR( B.n_cols);
4392 podarray<eT> WORK(4*A.n_rows);
4393 podarray<blas_int> IWORK( A.n_rows);
4394
4395 arma_extra_debug_print("lapack::gesvx()");
4396 lapack::gesvx
4397 (
4398 &fact, &trans, &n, &nrhs,
4399 A.memptr(), &lda,
4400 AF.memptr(), &ldaf,
4401 IPIV.memptr(),
4402 &equed,
4403 R.memptr(),
4404 C.memptr(),
4405 const_cast<eT*>(B.memptr()), &ldb,
4406 out.memptr(), &ldx,
4407 &rcond,
4408 FERR.memptr(),
4409 BERR.memptr(),
4410 WORK.memptr(),
4411 IWORK.memptr(),
4412 &info
4413 );
4414
4415 // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration;
4416 // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr
4417
4418 out_rcond = rcond;
4419
4420 return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0);
4421 }
4422 #else
4423 {
4424 arma_ignore(out);
4425 arma_ignore(out_rcond);
4426 arma_ignore(A);
4427 arma_ignore(B_expr);
4428 arma_ignore(equilibrate);
4429 arma_ignore(allow_ugly);
4430 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
4431 return false;
4432 }
4433 #endif
4434 }
4435
4436
4437
4438 //! solve a system of linear equations via LU decomposition with refinement (complex matrices)
4439 template<typename T1>
4440 inline
4441 bool
solve_square_refine(Mat<std::complex<typename T1::pod_type>> & out,typename T1::pod_type & out_rcond,Mat<std::complex<typename T1::pod_type>> & A,const Base<std::complex<typename T1::pod_type>,T1> & B_expr,const bool equilibrate,const bool allow_ugly)4442 auxlib::solve_square_refine(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base<std::complex<typename T1::pod_type>,T1>& B_expr, const bool equilibrate, const bool allow_ugly)
4443 {
4444 arma_extra_debug_sigprint();
4445
4446 #if defined(ARMA_USE_LAPACK)
4447 {
4448 typedef typename T1::pod_type T;
4449 typedef typename std::complex<T> eT;
4450
4451 // Mat<eT> B = B_expr.get_ref(); // B is overwritten by lapack::cx_gesvx() if equilibrate is enabled
4452
4453 quasi_unwrap<T1> UB(B_expr.get_ref()); // deliberately not declaring as const
4454
4455 const Mat<eT>& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below
4456
4457 const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out));
4458
4459 Mat<eT> B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; }
4460
4461 const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat;
4462
4463 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
4464
4465 if(A.is_empty() || B.is_empty())
4466 {
4467 out.zeros(A.n_rows, B.n_cols);
4468 return true;
4469 }
4470
4471 arma_debug_assert_blas_size(A,B);
4472
4473 out.set_size(A.n_rows, B.n_cols);
4474
4475 char fact = (equilibrate) ? 'E' : 'N';
4476 char trans = 'N';
4477 char equed = char(0);
4478 blas_int n = blas_int(A.n_rows);
4479 blas_int nrhs = blas_int(B.n_cols);
4480 blas_int lda = blas_int(A.n_rows);
4481 blas_int ldaf = blas_int(A.n_rows);
4482 blas_int ldb = blas_int(A.n_rows);
4483 blas_int ldx = blas_int(A.n_rows);
4484 blas_int info = blas_int(0);
4485 T rcond = T(0);
4486
4487 Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator());
4488
4489 podarray<blas_int> IPIV( A.n_rows);
4490 podarray< T> R( A.n_rows);
4491 podarray< T> C( A.n_rows);
4492 podarray< T> FERR( B.n_cols);
4493 podarray< T> BERR( B.n_cols);
4494 podarray<eT> WORK(2*A.n_rows);
4495 podarray< T> RWORK(2*A.n_rows);
4496
4497 arma_extra_debug_print("lapack::cx_gesvx()");
4498 lapack::cx_gesvx
4499 (
4500 &fact, &trans, &n, &nrhs,
4501 A.memptr(), &lda,
4502 AF.memptr(), &ldaf,
4503 IPIV.memptr(),
4504 &equed,
4505 R.memptr(),
4506 C.memptr(),
4507 const_cast<eT*>(B.memptr()), &ldb,
4508 out.memptr(), &ldx,
4509 &rcond,
4510 FERR.memptr(),
4511 BERR.memptr(),
4512 WORK.memptr(),
4513 RWORK.memptr(),
4514 &info
4515 );
4516
4517 // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration;
4518 // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr
4519
4520 out_rcond = rcond;
4521
4522 return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0);
4523 }
4524 #else
4525 {
4526 arma_ignore(out);
4527 arma_ignore(out_rcond);
4528 arma_ignore(A);
4529 arma_ignore(B_expr);
4530 arma_ignore(equilibrate);
4531 arma_ignore(allow_ugly);
4532 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
4533 return false;
4534 }
4535 #endif
4536 }
4537
4538
4539
4540 template<typename T1>
4541 inline
4542 bool
solve_sympd_fast(Mat<typename T1::elem_type> & out,Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr)4543 auxlib::solve_sympd_fast(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
4544 {
4545 arma_extra_debug_sigprint();
4546
4547 #if defined(ARMA_CRIPPLED_LAPACK)
4548 {
4549 arma_extra_debug_print("auxlib::solve_sympd_fast(): redirecting to auxlib::solve_square_fast() due to crippled LAPACK");
4550
4551 return auxlib::solve_square_fast(out, A, B_expr);
4552 }
4553 #else
4554 {
4555 return auxlib::solve_sympd_fast_common(out, A, B_expr);
4556 }
4557 #endif
4558 }
4559
4560
4561
4562 template<typename T1>
4563 inline
4564 bool
solve_sympd_fast_common(Mat<typename T1::elem_type> & out,Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr)4565 auxlib::solve_sympd_fast_common(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
4566 {
4567 arma_extra_debug_sigprint();
4568
4569 typedef typename T1::elem_type eT;
4570
4571 const uword A_n_rows = A.n_rows;
4572
4573 if((A_n_rows <= 4) && is_cx<eT>::no)
4574 {
4575 const bool status = auxlib::solve_square_tiny(out, A, B_expr.get_ref());
4576
4577 if(status) { return true; }
4578 }
4579
4580 out = B_expr.get_ref();
4581
4582 const uword B_n_rows = out.n_rows;
4583 const uword B_n_cols = out.n_cols;
4584
4585 arma_debug_check( (A_n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
4586
4587 if(A.is_empty() || out.is_empty())
4588 {
4589 out.zeros(A.n_cols, B_n_cols);
4590 return true;
4591 }
4592
4593 #if defined(ARMA_USE_ATLAS)
4594 {
4595 arma_debug_assert_atlas_size(A, out);
4596
4597 int info = 0;
4598
4599 arma_extra_debug_print("atlas::clapack_posv()");
4600 info = atlas::clapack_posv<eT>(atlas::CblasColMajor, atlas::CblasLower, A_n_rows, B_n_cols, A.memptr(), A_n_rows, out.memptr(), B_n_rows);
4601
4602 return (info == 0);
4603 }
4604 #elif defined(ARMA_USE_LAPACK)
4605 {
4606 arma_debug_assert_blas_size(A, out);
4607
4608 char uplo = 'L';
4609 blas_int n = blas_int(A_n_rows); // assuming A is square
4610 blas_int nrhs = blas_int(B_n_cols);
4611 blas_int lda = blas_int(A_n_rows);
4612 blas_int ldb = blas_int(B_n_rows);
4613 blas_int info = blas_int(0);
4614
4615 arma_extra_debug_print("lapack::posv()");
4616 lapack::posv<eT>(&uplo, &n, &nrhs, A.memptr(), &lda, out.memptr(), &ldb, &info);
4617
4618 return (info == 0);
4619 }
4620 #else
4621 {
4622 arma_ignore(out);
4623 arma_ignore(A);
4624 arma_ignore(B_expr);
4625 arma_stop_logic_error("solve(): use of ATLAS or LAPACK must be enabled");
4626 return false;
4627 }
4628 #endif
4629 }
4630
4631
4632
4633 //! solve a system of linear equations via Cholesky decomposition with rcond estimate (real matrices)
4634 template<typename T1>
4635 inline
4636 bool
solve_sympd_rcond(Mat<typename T1::pod_type> & out,typename T1::pod_type & out_rcond,Mat<typename T1::pod_type> & A,const Base<typename T1::pod_type,T1> & B_expr,const bool allow_ugly)4637 auxlib::solve_sympd_rcond(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr, const bool allow_ugly)
4638 {
4639 arma_extra_debug_sigprint();
4640
4641 #if defined(ARMA_USE_LAPACK)
4642 {
4643 typedef typename T1::elem_type eT;
4644 typedef typename T1::pod_type T;
4645
4646 out_rcond = T(0);
4647
4648 out = B_expr.get_ref();
4649
4650 const uword B_n_rows = out.n_rows;
4651 const uword B_n_cols = out.n_cols;
4652
4653 arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
4654
4655 if(A.is_empty() || out.is_empty())
4656 {
4657 out.zeros(A.n_cols, B_n_cols);
4658 return true;
4659 }
4660
4661 arma_debug_assert_blas_size(A, out);
4662
4663 char norm_id = '1';
4664 char uplo = 'L';
4665 blas_int n = blas_int(A.n_rows); // assuming A is square
4666 blas_int nrhs = blas_int(B_n_cols);
4667 blas_int info = blas_int(0);
4668 T norm_val = T(0);
4669
4670 podarray<T> work(A.n_rows);
4671
4672 arma_extra_debug_print("lapack::lansy()");
4673 norm_val = lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
4674
4675 arma_extra_debug_print("lapack::potrf()");
4676 lapack::potrf<eT>(&uplo, &n, A.memptr(), &n, &info);
4677
4678 if(info != 0) { return false; }
4679
4680 arma_extra_debug_print("lapack::potrs()");
4681 lapack::potrs<eT>(&uplo, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info);
4682
4683 if(info != 0) { return false; }
4684
4685 out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val);
4686
4687 if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; }
4688
4689 return true;
4690 }
4691 #else
4692 {
4693 arma_ignore(out);
4694 arma_ignore(out_rcond);
4695 arma_ignore(A);
4696 arma_ignore(B_expr);
4697 arma_ignore(allow_ugly);
4698 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
4699 return false;
4700 }
4701 #endif
4702 }
4703
4704
4705
4706 //! solve a system of linear equations via Cholesky decomposition with rcond estimate (complex matrices)
4707 template<typename T1>
4708 inline
4709 bool
solve_sympd_rcond(Mat<std::complex<typename T1::pod_type>> & out,typename T1::pod_type & out_rcond,Mat<std::complex<typename T1::pod_type>> & A,const Base<std::complex<typename T1::pod_type>,T1> & B_expr,const bool allow_ugly)4710 auxlib::solve_sympd_rcond(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base< std::complex<typename T1::pod_type>,T1>& B_expr, const bool allow_ugly)
4711 {
4712 arma_extra_debug_sigprint();
4713
4714 #if defined(ARMA_CRIPPLED_LAPACK)
4715 {
4716 arma_extra_debug_print("auxlib::solve_sympd_rcond(): redirecting to auxlib::solve_square_rcond() due to crippled LAPACK");
4717
4718 return auxlib::solve_square_rcond(out, out_rcond, A, B_expr, allow_ugly);
4719 }
4720 #elif defined(ARMA_USE_LAPACK)
4721 {
4722 typedef typename T1::pod_type T;
4723 typedef typename std::complex<T> eT;
4724
4725 out_rcond = T(0);
4726
4727 out = B_expr.get_ref();
4728
4729 const uword B_n_rows = out.n_rows;
4730 const uword B_n_cols = out.n_cols;
4731
4732 arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
4733
4734 if(A.is_empty() || out.is_empty())
4735 {
4736 out.zeros(A.n_cols, B_n_cols);
4737 return true;
4738 }
4739
4740 arma_debug_assert_blas_size(A, out);
4741
4742 char norm_id = '1';
4743 char uplo = 'L';
4744 blas_int n = blas_int(A.n_rows); // assuming A is square
4745 blas_int nrhs = blas_int(B_n_cols);
4746 blas_int info = blas_int(0);
4747 T norm_val = T(0);
4748
4749 podarray<T> work(A.n_rows);
4750
4751 arma_extra_debug_print("lapack::lanhe()");
4752 norm_val = lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr());
4753
4754 arma_extra_debug_print("lapack::potrf()");
4755 lapack::potrf<eT>(&uplo, &n, A.memptr(), &n, &info);
4756
4757 if(info != 0) { return false; }
4758
4759 arma_extra_debug_print("lapack::potrs()");
4760 lapack::potrs<eT>(&uplo, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info);
4761
4762 if(info != 0) { return false; }
4763
4764 out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val);
4765
4766 if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; }
4767
4768 return true;
4769 }
4770 #else
4771 {
4772 arma_ignore(out);
4773 arma_ignore(out_rcond);
4774 arma_ignore(A);
4775 arma_ignore(B_expr);
4776 arma_ignore(allow_ugly);
4777 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
4778 return false;
4779 }
4780 #endif
4781 }
4782
4783
4784
4785 //! solve a system of linear equations via Cholesky decomposition with refinement (real matrices)
4786 template<typename T1>
4787 inline
4788 bool
solve_sympd_refine(Mat<typename T1::pod_type> & out,typename T1::pod_type & out_rcond,Mat<typename T1::pod_type> & A,const Base<typename T1::pod_type,T1> & B_expr,const bool equilibrate,const bool allow_ugly)4789 auxlib::solve_sympd_refine(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr, const bool equilibrate, const bool allow_ugly)
4790 {
4791 arma_extra_debug_sigprint();
4792
4793 #if defined(ARMA_USE_LAPACK)
4794 {
4795 typedef typename T1::pod_type eT;
4796
4797 // Mat<eT> B = B_expr.get_ref(); // B is overwritten by lapack::posvx() if equilibrate is enabled
4798
4799 quasi_unwrap<T1> UB(B_expr.get_ref()); // deliberately not declaring as const
4800
4801 const Mat<eT>& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below
4802
4803 const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out));
4804
4805 Mat<eT> B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; }
4806
4807 const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat;
4808
4809 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
4810
4811 if(A.is_empty() || B.is_empty())
4812 {
4813 out.zeros(A.n_rows, B.n_cols);
4814 return true;
4815 }
4816
4817 arma_debug_assert_blas_size(A,B);
4818
4819 out.set_size(A.n_rows, B.n_cols);
4820
4821 char fact = (equilibrate) ? 'E' : 'N';
4822 char uplo = 'L';
4823 char equed = char(0);
4824 blas_int n = blas_int(A.n_rows);
4825 blas_int nrhs = blas_int(B.n_cols);
4826 blas_int lda = blas_int(A.n_rows);
4827 blas_int ldaf = blas_int(A.n_rows);
4828 blas_int ldb = blas_int(A.n_rows);
4829 blas_int ldx = blas_int(A.n_rows);
4830 blas_int info = blas_int(0);
4831 eT rcond = eT(0);
4832
4833 Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator());
4834
4835 podarray<eT> S( A.n_rows);
4836 podarray<eT> FERR( B.n_cols);
4837 podarray<eT> BERR( B.n_cols);
4838 podarray<eT> WORK(3*A.n_rows);
4839 podarray<blas_int> IWORK( A.n_rows);
4840
4841 arma_extra_debug_print("lapack::posvx()");
4842 lapack::posvx(&fact, &uplo, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, &equed, S.memptr(), const_cast<eT*>(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), IWORK.memptr(), &info);
4843
4844 // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration;
4845 // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr
4846
4847 out_rcond = rcond;
4848
4849 return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0);
4850 }
4851 #else
4852 {
4853 arma_ignore(out);
4854 arma_ignore(out_rcond);
4855 arma_ignore(A);
4856 arma_ignore(B_expr);
4857 arma_ignore(equilibrate);
4858 arma_ignore(allow_ugly);
4859 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
4860 return false;
4861 }
4862 #endif
4863 }
4864
4865
4866
4867 //! solve a system of linear equations via Cholesky decomposition with refinement (complex matrices)
4868 template<typename T1>
4869 inline
4870 bool
solve_sympd_refine(Mat<std::complex<typename T1::pod_type>> & out,typename T1::pod_type & out_rcond,Mat<std::complex<typename T1::pod_type>> & A,const Base<std::complex<typename T1::pod_type>,T1> & B_expr,const bool equilibrate,const bool allow_ugly)4871 auxlib::solve_sympd_refine(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base<std::complex<typename T1::pod_type>,T1>& B_expr, const bool equilibrate, const bool allow_ugly)
4872 {
4873 arma_extra_debug_sigprint();
4874
4875 #if defined(ARMA_CRIPPLED_LAPACK)
4876 {
4877 arma_extra_debug_print("auxlib::solve_sympd_refine(): redirecting to auxlib::solve_square_refine() due to crippled LAPACK");
4878
4879 return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate, allow_ugly);
4880 }
4881 #elif defined(ARMA_USE_LAPACK)
4882 {
4883 typedef typename T1::pod_type T;
4884 typedef typename std::complex<T> eT;
4885
4886 // Mat<eT> B = B_expr.get_ref(); // B is overwritten by lapack::cx_posvx() if equilibrate is enabled
4887
4888 quasi_unwrap<T1> UB(B_expr.get_ref()); // deliberately not declaring as const
4889
4890 const Mat<eT>& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below
4891
4892 const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out));
4893
4894 Mat<eT> B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; }
4895
4896 const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat;
4897
4898 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
4899
4900 if(A.is_empty() || B.is_empty())
4901 {
4902 out.zeros(A.n_rows, B.n_cols);
4903 return true;
4904 }
4905
4906 arma_debug_assert_blas_size(A,B);
4907
4908 out.set_size(A.n_rows, B.n_cols);
4909
4910 char fact = (equilibrate) ? 'E' : 'N';
4911 char uplo = 'L';
4912 char equed = char(0);
4913 blas_int n = blas_int(A.n_rows);
4914 blas_int nrhs = blas_int(B.n_cols);
4915 blas_int lda = blas_int(A.n_rows);
4916 blas_int ldaf = blas_int(A.n_rows);
4917 blas_int ldb = blas_int(A.n_rows);
4918 blas_int ldx = blas_int(A.n_rows);
4919 blas_int info = blas_int(0);
4920 T rcond = T(0);
4921
4922 Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator());
4923
4924 podarray< T> S( A.n_rows);
4925 podarray< T> FERR( B.n_cols);
4926 podarray< T> BERR( B.n_cols);
4927 podarray<eT> WORK(2*A.n_rows);
4928 podarray< T> RWORK( A.n_rows);
4929
4930 arma_extra_debug_print("lapack::cx_posvx()");
4931 lapack::cx_posvx(&fact, &uplo, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, &equed, S.memptr(), const_cast<eT*>(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), RWORK.memptr(), &info);
4932
4933 // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration;
4934 // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr
4935
4936 out_rcond = rcond;
4937
4938 return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0);
4939 }
4940 #else
4941 {
4942 arma_ignore(out);
4943 arma_ignore(out_rcond);
4944 arma_ignore(A);
4945 arma_ignore(B_expr);
4946 arma_ignore(equilibrate);
4947 arma_ignore(allow_ugly);
4948 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
4949 return false;
4950 }
4951 #endif
4952 }
4953
4954
4955
4956 //! solve a non-square full-rank system via QR or LQ decomposition
4957 template<typename T1>
4958 inline
4959 bool
solve_rect_fast(Mat<typename T1::elem_type> & out,Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr)4960 auxlib::solve_rect_fast(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
4961 {
4962 arma_extra_debug_sigprint();
4963
4964 #if defined(ARMA_USE_LAPACK)
4965 {
4966 typedef typename T1::elem_type eT;
4967
4968 const unwrap<T1> U(B_expr.get_ref());
4969 const Mat<eT>& B = U.M;
4970
4971 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
4972
4973 if(A.is_empty() || B.is_empty())
4974 {
4975 out.zeros(A.n_cols, B.n_cols);
4976 return true;
4977 }
4978
4979 arma_debug_assert_blas_size(A,B);
4980
4981 Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() );
4982
4983 if(arma::size(tmp) == arma::size(B))
4984 {
4985 tmp = B;
4986 }
4987 else
4988 {
4989 tmp.zeros();
4990 tmp(0,0, arma::size(B)) = B;
4991 }
4992
4993 char trans = 'N';
4994 blas_int m = blas_int(A.n_rows);
4995 blas_int n = blas_int(A.n_cols);
4996 blas_int lda = blas_int(A.n_rows);
4997 blas_int ldb = blas_int(tmp.n_rows);
4998 blas_int nrhs = blas_int(B.n_cols);
4999 blas_int min_mn = (std::min)(m,n);
5000 blas_int lwork_min = (std::max)(blas_int(1), min_mn + (std::max)(min_mn, nrhs));
5001 blas_int info = 0;
5002
5003 blas_int lwork_proposed = 0;
5004
5005 if((m*n) >= 1024)
5006 {
5007 eT work_query[2];
5008 blas_int lwork_query = -1;
5009
5010 arma_extra_debug_print("lapack::gels()");
5011 lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, &work_query[0], &lwork_query, &info );
5012
5013 if(info != 0) { return false; }
5014
5015 lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
5016 }
5017
5018 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
5019
5020 podarray<eT> work( static_cast<uword>(lwork_final) );
5021
5022 arma_extra_debug_print("lapack::gels()");
5023 lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork_final, &info );
5024
5025 if(info != 0) { return false; }
5026
5027 if(tmp.n_rows == A.n_cols)
5028 {
5029 out.steal_mem(tmp);
5030 }
5031 else
5032 {
5033 out = tmp.head_rows(A.n_cols);
5034 }
5035
5036 return true;
5037 }
5038 #else
5039 {
5040 arma_ignore(out);
5041 arma_ignore(A);
5042 arma_ignore(B_expr);
5043 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5044 return false;
5045 }
5046 #endif
5047 }
5048
5049
5050
5051 //! solve a non-square full-rank system via QR or LQ decomposition with rcond estimate (experimental)
5052 template<typename T1>
5053 inline
5054 bool
solve_rect_rcond(Mat<typename T1::elem_type> & out,typename T1::pod_type & out_rcond,Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr,const bool allow_ugly)5055 auxlib::solve_rect_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr, const bool allow_ugly)
5056 {
5057 arma_extra_debug_sigprint();
5058
5059 #if defined(ARMA_USE_LAPACK)
5060 {
5061 typedef typename T1::elem_type eT;
5062 typedef typename T1::pod_type T;
5063
5064 out_rcond = T(0);
5065
5066 const unwrap<T1> U(B_expr.get_ref());
5067 const Mat<eT>& B = U.M;
5068
5069 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
5070
5071 if(A.is_empty() || B.is_empty())
5072 {
5073 out.zeros(A.n_cols, B.n_cols);
5074 return true;
5075 }
5076
5077 arma_debug_assert_blas_size(A,B);
5078
5079 Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() );
5080
5081 if(arma::size(tmp) == arma::size(B))
5082 {
5083 tmp = B;
5084 }
5085 else
5086 {
5087 tmp.zeros();
5088 tmp(0,0, arma::size(B)) = B;
5089 }
5090
5091 char trans = 'N';
5092 blas_int m = blas_int(A.n_rows);
5093 blas_int n = blas_int(A.n_cols);
5094 blas_int lda = blas_int(A.n_rows);
5095 blas_int ldb = blas_int(tmp.n_rows);
5096 blas_int nrhs = blas_int(B.n_cols);
5097 blas_int min_mn = (std::min)(m,n);
5098 blas_int lwork_min = (std::max)(blas_int(1), min_mn + (std::max)(min_mn, nrhs));
5099 blas_int info = 0;
5100
5101 blas_int lwork_proposed = 0;
5102
5103 if((m*n) >= 1024)
5104 {
5105 eT work_query[2];
5106 blas_int lwork_query = -1;
5107
5108 arma_extra_debug_print("lapack::gels()");
5109 lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, &work_query[0], &lwork_query, &info );
5110
5111 if(info != 0) { return false; }
5112
5113 lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
5114 }
5115
5116 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
5117
5118 podarray<eT> work( static_cast<uword>(lwork_final) );
5119
5120 arma_extra_debug_print("lapack::gels()");
5121 lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork_final, &info );
5122
5123 if(info != 0) { return false; }
5124
5125 if(A.n_rows >= A.n_cols)
5126 {
5127 arma_extra_debug_print("estimating rcond via R");
5128
5129 // xGELS docs: for M >= N, A contains details of its QR decomposition as returned by xGEQRF
5130 // xGEQRF docs: elements on and above the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R
5131
5132 Mat<eT> R(A.n_cols, A.n_cols, arma_zeros_indicator());
5133
5134 for(uword col=0; col < A.n_cols; ++col)
5135 {
5136 for(uword row=0; row <= col; ++row)
5137 {
5138 R.at(row,col) = A.at(row,col);
5139 }
5140 }
5141
5142 // determine quality of solution
5143 out_rcond = auxlib::rcond_trimat(R, 0); // 0: upper triangular; 1: lower triangular
5144
5145 if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; }
5146 }
5147 else
5148 if(A.n_rows < A.n_cols)
5149 {
5150 arma_extra_debug_print("estimating rcond via L");
5151
5152 // xGELS docs: for M < N, A contains details of its LQ decomposition as returned by xGELQF
5153 // xGELQF docs: elements on and below the diagonal contain the M-by-min(M,N) lower trapezoidal matrix L
5154
5155 Mat<eT> L(A.n_rows, A.n_rows, arma_zeros_indicator());
5156
5157 for(uword col=0; col < A.n_rows; ++col)
5158 {
5159 for(uword row=col; row < A.n_rows; ++row)
5160 {
5161 L.at(row,col) = A.at(row,col);
5162 }
5163 }
5164
5165 // determine quality of solution
5166 out_rcond = auxlib::rcond_trimat(L, 1); // 0: upper triangular; 1: lower triangular
5167
5168 if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; }
5169 }
5170
5171 if(tmp.n_rows == A.n_cols)
5172 {
5173 out.steal_mem(tmp);
5174 }
5175 else
5176 {
5177 out = tmp.head_rows(A.n_cols);
5178 }
5179
5180 return true;
5181 }
5182 #else
5183 {
5184 arma_ignore(out);
5185 arma_ignore(out_rcond);
5186 arma_ignore(A);
5187 arma_ignore(B_expr);
5188 arma_ignore(allow_ugly);
5189 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5190 return false;
5191 }
5192 #endif
5193 }
5194
5195
5196
5197 template<typename T1>
5198 inline
5199 bool
solve_approx_svd(Mat<typename T1::pod_type> & out,Mat<typename T1::pod_type> & A,const Base<typename T1::pod_type,T1> & B_expr)5200 auxlib::solve_approx_svd(Mat<typename T1::pod_type>& out, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr)
5201 {
5202 arma_extra_debug_sigprint();
5203
5204 #if defined(ARMA_USE_LAPACK)
5205 {
5206 typedef typename T1::pod_type eT;
5207
5208 const unwrap<T1> U(B_expr.get_ref());
5209 const Mat<eT>& B = U.M;
5210
5211 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
5212
5213 if(A.is_empty() || B.is_empty())
5214 {
5215 out.zeros(A.n_cols, B.n_cols);
5216 return true;
5217 }
5218
5219 arma_debug_assert_blas_size(A,B);
5220
5221 Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() );
5222
5223 if(arma::size(tmp) == arma::size(B))
5224 {
5225 tmp = B;
5226 }
5227 else
5228 {
5229 tmp.zeros();
5230 tmp(0,0, arma::size(B)) = B;
5231 }
5232
5233 blas_int m = blas_int(A.n_rows);
5234 blas_int n = blas_int(A.n_cols);
5235 blas_int min_mn = (std::min)(m, n);
5236 blas_int nrhs = blas_int(B.n_cols);
5237 blas_int lda = blas_int(A.n_rows);
5238 blas_int ldb = blas_int(tmp.n_rows);
5239 eT rcond = eT(-1); // -1 means "use machine precision"
5240 blas_int rank = blas_int(0);
5241 blas_int info = blas_int(0);
5242
5243 podarray<eT> S( static_cast<uword>(min_mn) );
5244
5245 // NOTE: with LAPACK 3.8, can use the workspace query to also obtain liwork,
5246 // NOTE: which makes the call to lapack::laenv() redundant
5247
5248 blas_int ispec = blas_int(9);
5249
5250 const char* const_name = (is_float<eT>::value) ? "SGELSD" : "DGELSD";
5251 const char* const_opts = " ";
5252
5253 char* name = const_cast<char*>(const_name);
5254 char* opts = const_cast<char*>(const_opts);
5255
5256 blas_int n1 = m;
5257 blas_int n2 = n;
5258 blas_int n3 = nrhs;
5259 blas_int n4 = lda;
5260
5261 blas_int laenv_result = (arma_config::hidden_args) ? blas_int(lapack::laenv(&ispec, name, opts, &n1, &n2, &n3, &n4, 6, 1)) : blas_int(0);
5262
5263 blas_int smlsiz = (std::max)( blas_int(25), laenv_result );
5264 blas_int smlsiz_p1 = blas_int(1) + smlsiz;
5265
5266 blas_int nlvl = (std::max)( blas_int(0), blas_int(1) + blas_int( std::log(double(min_mn) / double(smlsiz_p1))/double(0.69314718055994530942) ) );
5267 blas_int liwork = (std::max)( blas_int(1), (blas_int(3)*min_mn*nlvl + blas_int(11)*min_mn) );
5268
5269 podarray<blas_int> iwork( static_cast<uword>(liwork) );
5270
5271 blas_int lwork_min = blas_int(12)*min_mn + blas_int(2)*min_mn*smlsiz + blas_int(8)*min_mn*nlvl + min_mn*nrhs + smlsiz_p1*smlsiz_p1;
5272
5273 eT work_query[2];
5274 blas_int lwork_query = blas_int(-1);
5275
5276 arma_extra_debug_print("lapack::gelsd()");
5277 lapack::gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, &work_query[0], &lwork_query, iwork.memptr(), &info);
5278
5279 if(info != 0) { return false; }
5280
5281 // NOTE: in LAPACK 3.8, iwork[0] returns the minimum liwork
5282
5283 blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) );
5284 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
5285
5286 podarray<eT> work( static_cast<uword>(lwork_final) );
5287
5288 arma_extra_debug_print("lapack::gelsd()");
5289 lapack::gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, work.memptr(), &lwork_final, iwork.memptr(), &info);
5290
5291 if(info != 0) { return false; }
5292
5293 if(tmp.n_rows == A.n_cols)
5294 {
5295 out.steal_mem(tmp);
5296 }
5297 else
5298 {
5299 out = tmp.head_rows(A.n_cols);
5300 }
5301
5302 return true;
5303 }
5304 #else
5305 {
5306 arma_ignore(out);
5307 arma_ignore(A);
5308 arma_ignore(B_expr);
5309 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5310 return false;
5311 }
5312 #endif
5313 }
5314
5315
5316
5317 template<typename T1>
5318 inline
5319 bool
solve_approx_svd(Mat<std::complex<typename T1::pod_type>> & out,Mat<std::complex<typename T1::pod_type>> & A,const Base<std::complex<typename T1::pod_type>,T1> & B_expr)5320 auxlib::solve_approx_svd(Mat< std::complex<typename T1::pod_type> >& out, Mat< std::complex<typename T1::pod_type> >& A, const Base<std::complex<typename T1::pod_type>,T1>& B_expr)
5321 {
5322 arma_extra_debug_sigprint();
5323
5324 #if defined(ARMA_USE_LAPACK)
5325 {
5326 typedef typename T1::pod_type T;
5327 typedef typename std::complex<T> eT;
5328
5329 const unwrap<T1> U(B_expr.get_ref());
5330 const Mat<eT>& B = U.M;
5331
5332 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
5333
5334 if(A.is_empty() || B.is_empty())
5335 {
5336 out.zeros(A.n_cols, B.n_cols);
5337 return true;
5338 }
5339
5340 arma_debug_assert_blas_size(A,B);
5341
5342 Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() );
5343
5344 if(arma::size(tmp) == arma::size(B))
5345 {
5346 tmp = B;
5347 }
5348 else
5349 {
5350 tmp.zeros();
5351 tmp(0,0, arma::size(B)) = B;
5352 }
5353
5354 blas_int m = blas_int(A.n_rows);
5355 blas_int n = blas_int(A.n_cols);
5356 blas_int min_mn = (std::min)(m, n);
5357 blas_int nrhs = blas_int(B.n_cols);
5358 blas_int lda = blas_int(A.n_rows);
5359 blas_int ldb = blas_int(tmp.n_rows);
5360 T rcond = T(-1); // -1 means "use machine precision"
5361 blas_int rank = blas_int(0);
5362 blas_int info = blas_int(0);
5363
5364 podarray<T> S( static_cast<uword>(min_mn) );
5365
5366 blas_int ispec = blas_int(9);
5367
5368 const char* const_name = (is_float<T>::value) ? "CGELSD" : "ZGELSD";
5369 const char* const_opts = " ";
5370
5371 char* name = const_cast<char*>(const_name);
5372 char* opts = const_cast<char*>(const_opts);
5373
5374 blas_int n1 = m;
5375 blas_int n2 = n;
5376 blas_int n3 = nrhs;
5377 blas_int n4 = lda;
5378
5379 blas_int laenv_result = (arma_config::hidden_args) ? blas_int(lapack::laenv(&ispec, name, opts, &n1, &n2, &n3, &n4, 6, 1)) : blas_int(0);
5380
5381 blas_int smlsiz = (std::max)( blas_int(25), laenv_result );
5382 blas_int smlsiz_p1 = blas_int(1) + smlsiz;
5383
5384 blas_int nlvl = (std::max)( blas_int(0), blas_int(1) + blas_int( std::log(double(min_mn) / double(smlsiz_p1))/double(0.69314718055994530942) ) );
5385
5386 blas_int lrwork = (m >= n)
5387 ? blas_int(10)*n + blas_int(2)*n*smlsiz + blas_int(8)*n*nlvl + blas_int(3)*smlsiz*nrhs + (std::max)( (smlsiz_p1)*(smlsiz_p1), n*(blas_int(1)+nrhs) + blas_int(2)*nrhs )
5388 : blas_int(10)*m + blas_int(2)*m*smlsiz + blas_int(8)*m*nlvl + blas_int(3)*smlsiz*nrhs + (std::max)( (smlsiz_p1)*(smlsiz_p1), n*(blas_int(1)+nrhs) + blas_int(2)*nrhs );
5389
5390 blas_int liwork = (std::max)( blas_int(1), (blas_int(3)*blas_int(min_mn)*nlvl + blas_int(11)*blas_int(min_mn)) );
5391
5392 podarray<T> rwork( static_cast<uword>(lrwork) );
5393 podarray<blas_int> iwork( static_cast<uword>(liwork) );
5394
5395 blas_int lwork_min = 2*min_mn + min_mn*nrhs;
5396
5397 eT work_query[2];
5398 blas_int lwork_query = blas_int(-1);
5399
5400 arma_extra_debug_print("lapack::cx_gelsd()");
5401 lapack::cx_gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info);
5402
5403 if(info != 0) { return false; }
5404
5405 blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real( work_query[0]) );
5406 blas_int lwork_final = (std::max)(lwork_proposed, lwork_min);
5407
5408 podarray<eT> work( static_cast<uword>(lwork_final) );
5409
5410 arma_extra_debug_print("lapack::cx_gelsd()");
5411 lapack::cx_gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info);
5412
5413 if(info != 0) { return false; }
5414
5415 if(tmp.n_rows == A.n_cols)
5416 {
5417 out.steal_mem(tmp);
5418 }
5419 else
5420 {
5421 out = tmp.head_rows(A.n_cols);
5422 }
5423
5424 return true;
5425 }
5426 #else
5427 {
5428 arma_ignore(out);
5429 arma_ignore(A);
5430 arma_ignore(B_expr);
5431 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5432 return false;
5433 }
5434 #endif
5435 }
5436
5437
5438
5439 template<typename T1>
5440 inline
5441 bool
solve_trimat_fast(Mat<typename T1::elem_type> & out,const Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr,const uword layout)5442 auxlib::solve_trimat_fast(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr, const uword layout)
5443 {
5444 arma_extra_debug_sigprint();
5445
5446 #if defined(ARMA_USE_LAPACK)
5447 {
5448 out = B_expr.get_ref();
5449
5450 const uword B_n_rows = out.n_rows;
5451 const uword B_n_cols = out.n_cols;
5452
5453 arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
5454
5455 if(A.is_empty() || out.is_empty())
5456 {
5457 out.zeros(A.n_cols, B_n_cols);
5458 return true;
5459 }
5460
5461 arma_debug_assert_blas_size(A,out);
5462
5463 char uplo = (layout == 0) ? 'U' : 'L';
5464 char trans = 'N';
5465 char diag = 'N';
5466 blas_int n = blas_int(A.n_rows);
5467 blas_int nrhs = blas_int(B_n_cols);
5468 blas_int info = 0;
5469
5470 arma_extra_debug_print("lapack::trtrs()");
5471 lapack::trtrs(&uplo, &trans, &diag, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info);
5472
5473 return (info == 0);
5474 }
5475 #else
5476 {
5477 arma_ignore(out);
5478 arma_ignore(A);
5479 arma_ignore(B_expr);
5480 arma_ignore(layout);
5481 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5482 return false;
5483 }
5484 #endif
5485 }
5486
5487
5488
5489 template<typename T1>
5490 inline
5491 bool
solve_trimat_rcond(Mat<typename T1::elem_type> & out,typename T1::pod_type & out_rcond,const Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr,const uword layout,const bool allow_ugly)5492 auxlib::solve_trimat_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr, const uword layout, const bool allow_ugly)
5493 {
5494 arma_extra_debug_sigprint();
5495
5496 #if defined(ARMA_USE_LAPACK)
5497 {
5498 typedef typename T1::pod_type T;
5499
5500 out_rcond = T(0);
5501
5502 out = B_expr.get_ref();
5503
5504 const uword B_n_rows = out.n_rows;
5505 const uword B_n_cols = out.n_cols;
5506
5507 arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
5508
5509 if(A.is_empty() || out.is_empty())
5510 {
5511 out.zeros(A.n_cols, B_n_cols);
5512 return true;
5513 }
5514
5515 arma_debug_assert_blas_size(A,out);
5516
5517 char uplo = (layout == 0) ? 'U' : 'L';
5518 char trans = 'N';
5519 char diag = 'N';
5520 blas_int n = blas_int(A.n_rows);
5521 blas_int nrhs = blas_int(B_n_cols);
5522 blas_int info = 0;
5523
5524 arma_extra_debug_print("lapack::trtrs()");
5525 lapack::trtrs(&uplo, &trans, &diag, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info);
5526
5527 if(info != 0) { return false; }
5528
5529 // determine quality of solution
5530 out_rcond = auxlib::rcond_trimat(A, layout);
5531
5532 if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; }
5533
5534 return true;
5535 }
5536 #else
5537 {
5538 arma_ignore(out);
5539 arma_ignore(out_rcond);
5540 arma_ignore(A);
5541 arma_ignore(B_expr);
5542 arma_ignore(layout);
5543 arma_ignore(allow_ugly);
5544 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5545 return false;
5546 }
5547 #endif
5548 }
5549
5550
5551
5552 //! solve a system of linear equations via LU decomposition (real band matrix)
5553 template<typename T1>
5554 inline
5555 bool
solve_band_fast(Mat<typename T1::pod_type> & out,Mat<typename T1::pod_type> & A,const uword KL,const uword KU,const Base<typename T1::pod_type,T1> & B_expr)5556 auxlib::solve_band_fast(Mat<typename T1::pod_type>& out, Mat<typename T1::pod_type>& A, const uword KL, const uword KU, const Base<typename T1::pod_type,T1>& B_expr)
5557 {
5558 arma_extra_debug_sigprint();
5559
5560 return auxlib::solve_band_fast_common(out, A, KL, KU, B_expr);
5561 }
5562
5563
5564
5565 //! solve a system of linear equations via LU decomposition (complex band matrix)
5566 template<typename T1>
5567 inline
5568 bool
solve_band_fast(Mat<std::complex<typename T1::pod_type>> & out,Mat<std::complex<typename T1::pod_type>> & A,const uword KL,const uword KU,const Base<std::complex<typename T1::pod_type>,T1> & B_expr)5569 auxlib::solve_band_fast(Mat< std::complex<typename T1::pod_type> >& out, Mat< std::complex<typename T1::pod_type> >& A, const uword KL, const uword KU, const Base< std::complex<typename T1::pod_type>,T1>& B_expr)
5570 {
5571 arma_extra_debug_sigprint();
5572
5573 #if defined(ARMA_CRIPPLED_LAPACK)
5574 {
5575 arma_extra_debug_print("auxlib::solve_band_fast(): redirecting to auxlib::solve_square_fast() due to crippled LAPACK");
5576
5577 arma_ignore(KL);
5578 arma_ignore(KU);
5579
5580 return auxlib::solve_square_fast(out, A, B_expr);
5581 }
5582 #else
5583 {
5584 return auxlib::solve_band_fast_common(out, A, KL, KU, B_expr);
5585 }
5586 #endif
5587 }
5588
5589
5590
5591 //! solve a system of linear equations via LU decomposition (band matrix)
5592 template<typename T1>
5593 inline
5594 bool
solve_band_fast_common(Mat<typename T1::elem_type> & out,const Mat<typename T1::elem_type> & A,const uword KL,const uword KU,const Base<typename T1::elem_type,T1> & B_expr)5595 auxlib::solve_band_fast_common(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const uword KL, const uword KU, const Base<typename T1::elem_type,T1>& B_expr)
5596 {
5597 arma_extra_debug_sigprint();
5598
5599 #if defined(ARMA_USE_LAPACK)
5600 {
5601 typedef typename T1::elem_type eT;
5602
5603 out = B_expr.get_ref();
5604
5605 const uword B_n_rows = out.n_rows;
5606 const uword B_n_cols = out.n_cols;
5607
5608 arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
5609
5610 if(A.is_empty() || out.is_empty())
5611 {
5612 out.zeros(A.n_rows, B_n_cols);
5613 return true;
5614 }
5615
5616 // for gbsv, matrix AB size: 2*KL+KU+1 x N; band representation of A stored in rows KL+1 to 2*KL+KU+1 (note: fortran counts from 1)
5617
5618 Mat<eT> AB;
5619 band_helper::compress(AB, A, KL, KU, true);
5620
5621 const uword N = AB.n_cols; // order of the original square matrix A
5622
5623 arma_debug_assert_blas_size(AB,out);
5624
5625 blas_int n = blas_int(N);
5626 blas_int kl = blas_int(KL);
5627 blas_int ku = blas_int(KU);
5628 blas_int nrhs = blas_int(B_n_cols);
5629 blas_int ldab = blas_int(AB.n_rows);
5630 blas_int ldb = blas_int(B_n_rows);
5631 blas_int info = blas_int(0);
5632
5633 podarray<blas_int> ipiv(N + 2); // +2 for paranoia
5634
5635 // NOTE: AB is overwritten
5636
5637 arma_extra_debug_print("lapack::gbsv()");
5638 lapack::gbsv<eT>(&n, &kl, &ku, &nrhs, AB.memptr(), &ldab, ipiv.memptr(), out.memptr(), &ldb, &info);
5639
5640 return (info == 0);
5641 }
5642 #else
5643 {
5644 arma_ignore(out);
5645 arma_ignore(A);
5646 arma_ignore(KL);
5647 arma_ignore(KU);
5648 arma_ignore(B_expr);
5649 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5650 return false;
5651 }
5652 #endif
5653 }
5654
5655
5656
5657 //! solve a system of linear equations via LU decomposition (real band matrix)
5658 template<typename T1>
5659 inline
5660 bool
solve_band_rcond(Mat<typename T1::pod_type> & out,typename T1::pod_type & out_rcond,Mat<typename T1::pod_type> & A,const uword KL,const uword KU,const Base<typename T1::pod_type,T1> & B_expr,const bool allow_ugly)5661 auxlib::solve_band_rcond(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const uword KL, const uword KU, const Base<typename T1::pod_type,T1>& B_expr, const bool allow_ugly)
5662 {
5663 arma_extra_debug_sigprint();
5664
5665 return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr, allow_ugly);
5666 }
5667
5668
5669
5670 //! solve a system of linear equations via LU decomposition (complex band matrix)
5671 template<typename T1>
5672 inline
5673 bool
solve_band_rcond(Mat<std::complex<typename T1::pod_type>> & out,typename T1::pod_type & out_rcond,Mat<std::complex<typename T1::pod_type>> & A,const uword KL,const uword KU,const Base<std::complex<typename T1::pod_type>,T1> & B_expr,const bool allow_ugly)5674 auxlib::solve_band_rcond(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const uword KL, const uword KU, const Base< std::complex<typename T1::pod_type>,T1>& B_expr, const bool allow_ugly)
5675 {
5676 arma_extra_debug_sigprint();
5677
5678 #if defined(ARMA_CRIPPLED_LAPACK)
5679 {
5680 arma_extra_debug_print("auxlib::solve_band_rcond(): redirecting to auxlib::solve_square_rcond() due to crippled LAPACK");
5681
5682 arma_ignore(KL);
5683 arma_ignore(KU);
5684
5685 return auxlib::solve_square_rcond(out, out_rcond, A, B_expr, allow_ugly);
5686 }
5687 #else
5688 {
5689 return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr, allow_ugly);
5690 }
5691 #endif
5692 }
5693
5694
5695
5696 //! solve a system of linear equations via LU decomposition (band matrix)
5697 template<typename T1>
5698 inline
5699 bool
solve_band_rcond_common(Mat<typename T1::elem_type> & out,typename T1::pod_type & out_rcond,const Mat<typename T1::elem_type> & A,const uword KL,const uword KU,const Base<typename T1::elem_type,T1> & B_expr,const bool allow_ugly)5700 auxlib::solve_band_rcond_common(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, const Mat<typename T1::elem_type>& A, const uword KL, const uword KU, const Base<typename T1::elem_type,T1>& B_expr, const bool allow_ugly)
5701 {
5702 arma_extra_debug_sigprint();
5703
5704 #if defined(ARMA_USE_LAPACK)
5705 {
5706 typedef typename T1::elem_type eT;
5707 typedef typename T1::pod_type T;
5708
5709 out_rcond = T(0);
5710
5711 out = B_expr.get_ref();
5712
5713 const uword B_n_rows = out.n_rows;
5714 const uword B_n_cols = out.n_cols;
5715
5716 arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
5717
5718 if(A.is_empty() || out.is_empty())
5719 {
5720 out.zeros(A.n_rows, B_n_cols);
5721 return true;
5722 }
5723
5724 // for gbtrf, matrix AB size: 2*KL+KU+1 x N; band representation of A stored in rows KL+1 to 2*KL+KU+1 (note: fortran counts from 1)
5725
5726 Mat<eT> AB;
5727 band_helper::compress(AB, A, KL, KU, true);
5728
5729 const uword N = AB.n_cols; // order of the original square matrix A
5730
5731 arma_debug_assert_blas_size(AB,out);
5732
5733 char norm_id = '1';
5734 char trans = 'N';
5735 blas_int n = blas_int(N); // assuming square matrix
5736 blas_int kl = blas_int(KL);
5737 blas_int ku = blas_int(KU);
5738 blas_int nrhs = blas_int(B_n_cols);
5739 blas_int ldab = blas_int(AB.n_rows);
5740 blas_int ldb = blas_int(B_n_rows);
5741 blas_int info = blas_int(0);
5742 T norm_val = T(0);
5743
5744 podarray<T> junk(1);
5745 podarray<blas_int> ipiv(N + 2); // +2 for paranoia
5746
5747 arma_extra_debug_print("lapack::langb()");
5748 norm_val = lapack::langb<eT>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, junk.memptr());
5749
5750 arma_extra_debug_print("lapack::gbtrf()");
5751 lapack::gbtrf<eT>(&n, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &info);
5752
5753 if(info != 0) { return false; }
5754
5755 arma_extra_debug_print("lapack::gbtrs()");
5756 lapack::gbtrs<eT>(&trans, &n, &kl, &ku, &nrhs, AB.memptr(), &ldab, ipiv.memptr(), out.memptr(), &ldb, &info);
5757
5758 if(info != 0) { return false; }
5759
5760 out_rcond = auxlib::lu_rcond_band<T>(AB, KL, KU, ipiv, norm_val);
5761
5762 if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(AB)) ) { return false; }
5763
5764 return true;
5765 }
5766 #else
5767 {
5768 arma_ignore(out);
5769 arma_ignore(out_rcond);
5770 arma_ignore(A);
5771 arma_ignore(KL);
5772 arma_ignore(KU);
5773 arma_ignore(B_expr);
5774 arma_ignore(allow_ugly);
5775 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5776 return false;
5777 }
5778 #endif
5779 }
5780
5781
5782
5783 //! solve a system of linear equations via LU decomposition with refinement (real band matrices)
5784 template<typename T1>
5785 inline
5786 bool
solve_band_refine(Mat<typename T1::pod_type> & out,typename T1::pod_type & out_rcond,Mat<typename T1::pod_type> & A,const uword KL,const uword KU,const Base<typename T1::pod_type,T1> & B_expr,const bool equilibrate,const bool allow_ugly)5787 auxlib::solve_band_refine(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const uword KL, const uword KU, const Base<typename T1::pod_type,T1>& B_expr, const bool equilibrate, const bool allow_ugly)
5788 {
5789 arma_extra_debug_sigprint();
5790
5791 #if defined(ARMA_USE_LAPACK)
5792 {
5793 typedef typename T1::pod_type eT;
5794
5795 Mat<eT> B = B_expr.get_ref(); // B is overwritten
5796
5797 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
5798
5799 if(A.is_empty() || B.is_empty())
5800 {
5801 out.zeros(A.n_rows, B.n_cols);
5802 return true;
5803 }
5804
5805 // for gbsvx, matrix AB size: KL+KU+1 x N; band representation of A stored in rows 1 to KL+KU+1 (note: fortran counts from 1)
5806
5807 Mat<eT> AB;
5808 band_helper::compress(AB, A, KL, KU, false);
5809
5810 const uword N = AB.n_cols;
5811
5812 arma_debug_assert_blas_size(AB,B);
5813
5814 out.set_size(N, B.n_cols);
5815
5816 Mat<eT> AFB(2*KL+KU+1, N, arma_nozeros_indicator());
5817
5818 char fact = (equilibrate) ? 'E' : 'N';
5819 char trans = 'N';
5820 char equed = char(0);
5821 blas_int n = blas_int(N);
5822 blas_int kl = blas_int(KL);
5823 blas_int ku = blas_int(KU);
5824 blas_int nrhs = blas_int(B.n_cols);
5825 blas_int ldab = blas_int(AB.n_rows);
5826 blas_int ldafb = blas_int(AFB.n_rows);
5827 blas_int ldb = blas_int(B.n_rows);
5828 blas_int ldx = blas_int(N);
5829 blas_int info = blas_int(0);
5830 eT rcond = eT(0);
5831
5832 podarray<blas_int> IPIV( N);
5833 podarray<eT> R( N);
5834 podarray<eT> C( N);
5835 podarray<eT> FERR( B.n_cols);
5836 podarray<eT> BERR( B.n_cols);
5837 podarray<eT> WORK(3*N);
5838 podarray<blas_int> IWORK( N);
5839
5840 arma_extra_debug_print("lapack::gbsvx()");
5841 lapack::gbsvx
5842 (
5843 &fact, &trans, &n, &kl, &ku, &nrhs,
5844 AB.memptr(), &ldab,
5845 AFB.memptr(), &ldafb,
5846 IPIV.memptr(),
5847 &equed,
5848 R.memptr(),
5849 C.memptr(),
5850 B.memptr(), &ldb,
5851 out.memptr(), &ldx,
5852 &rcond,
5853 FERR.memptr(),
5854 BERR.memptr(),
5855 WORK.memptr(),
5856 IWORK.memptr(),
5857 &info
5858 );
5859
5860 out_rcond = rcond;
5861
5862 return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0);
5863 }
5864 #else
5865 {
5866 arma_ignore(out);
5867 arma_ignore(out_rcond);
5868 arma_ignore(A);
5869 arma_ignore(KL);
5870 arma_ignore(KU);
5871 arma_ignore(B_expr);
5872 arma_ignore(equilibrate);
5873 arma_ignore(allow_ugly);
5874 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5875 return false;
5876 }
5877 #endif
5878 }
5879
5880
5881
5882 //! solve a system of linear equations via LU decomposition with refinement (complex band matrices)
5883 template<typename T1>
5884 inline
5885 bool
solve_band_refine(Mat<std::complex<typename T1::pod_type>> & out,typename T1::pod_type & out_rcond,Mat<std::complex<typename T1::pod_type>> & A,const uword KL,const uword KU,const Base<std::complex<typename T1::pod_type>,T1> & B_expr,const bool equilibrate,const bool allow_ugly)5886 auxlib::solve_band_refine(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const uword KL, const uword KU, const Base<std::complex<typename T1::pod_type>,T1>& B_expr, const bool equilibrate, const bool allow_ugly)
5887 {
5888 arma_extra_debug_sigprint();
5889
5890 #if defined(ARMA_CRIPPLED_LAPACK)
5891 {
5892 arma_extra_debug_print("auxlib::solve_band_refine(): redirecting to auxlib::solve_square_refine() due to crippled LAPACK");
5893
5894 arma_ignore(KL);
5895 arma_ignore(KU);
5896
5897 return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate, allow_ugly);
5898 }
5899 #elif defined(ARMA_USE_LAPACK)
5900 {
5901 typedef typename T1::pod_type T;
5902 typedef typename std::complex<T> eT;
5903
5904 Mat<eT> B = B_expr.get_ref(); // B is overwritten
5905
5906 arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" );
5907
5908 if(A.is_empty() || B.is_empty())
5909 {
5910 out.zeros(A.n_rows, B.n_cols);
5911 return true;
5912 }
5913
5914 // for gbsvx, matrix AB size: KL+KU+1 x N; band representation of A stored in rows 1 to KL+KU+1 (note: fortran counts from 1)
5915
5916 Mat<eT> AB;
5917 band_helper::compress(AB, A, KL, KU, false);
5918
5919 const uword N = AB.n_cols;
5920
5921 arma_debug_assert_blas_size(AB,B);
5922
5923 out.set_size(N, B.n_cols);
5924
5925 Mat<eT> AFB(2*KL+KU+1, N, arma_nozeros_indicator());
5926
5927 char fact = (equilibrate) ? 'E' : 'N';
5928 char trans = 'N';
5929 char equed = char(0);
5930 blas_int n = blas_int(N);
5931 blas_int kl = blas_int(KL);
5932 blas_int ku = blas_int(KU);
5933 blas_int nrhs = blas_int(B.n_cols);
5934 blas_int ldab = blas_int(AB.n_rows);
5935 blas_int ldafb = blas_int(AFB.n_rows);
5936 blas_int ldb = blas_int(B.n_rows);
5937 blas_int ldx = blas_int(N);
5938 blas_int info = blas_int(0);
5939 T rcond = T(0);
5940
5941 podarray<blas_int> IPIV( N);
5942 podarray< T> R( N);
5943 podarray< T> C( N);
5944 podarray< T> FERR( B.n_cols);
5945 podarray< T> BERR( B.n_cols);
5946 podarray<eT> WORK(2*N);
5947 podarray< T> RWORK( N); // NOTE: according to lapack 3.6.1 docs, the size of RWORK in zgbsvx is different to RWORK in dgesvx
5948
5949 arma_extra_debug_print("lapack::cx_gbsvx()");
5950 lapack::cx_gbsvx
5951 (
5952 &fact, &trans, &n, &kl, &ku, &nrhs,
5953 AB.memptr(), &ldab,
5954 AFB.memptr(), &ldafb,
5955 IPIV.memptr(),
5956 &equed,
5957 R.memptr(),
5958 C.memptr(),
5959 B.memptr(), &ldb,
5960 out.memptr(), &ldx,
5961 &rcond,
5962 FERR.memptr(),
5963 BERR.memptr(),
5964 WORK.memptr(),
5965 RWORK.memptr(),
5966 &info
5967 );
5968
5969 out_rcond = rcond;
5970
5971 return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0);
5972 }
5973 #else
5974 {
5975 arma_ignore(out);
5976 arma_ignore(out_rcond);
5977 arma_ignore(A);
5978 arma_ignore(KL);
5979 arma_ignore(KU);
5980 arma_ignore(B_expr);
5981 arma_ignore(equilibrate);
5982 arma_ignore(allow_ugly);
5983 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
5984 return false;
5985 }
5986 #endif
5987 }
5988
5989
5990
5991 //! solve a system of linear equations via Gaussian elimination with partial pivoting (real tridiagonal band matrix)
5992 template<typename T1>
5993 inline
5994 bool
solve_tridiag_fast(Mat<typename T1::pod_type> & out,Mat<typename T1::pod_type> & A,const Base<typename T1::pod_type,T1> & B_expr)5995 auxlib::solve_tridiag_fast(Mat<typename T1::pod_type>& out, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr)
5996 {
5997 arma_extra_debug_sigprint();
5998
5999 return auxlib::solve_tridiag_fast_common(out, A, B_expr);
6000 }
6001
6002
6003
6004 //! solve a system of linear equations via Gaussian elimination with partial pivoting (complex tridiagonal band matrix)
6005 template<typename T1>
6006 inline
6007 bool
solve_tridiag_fast(Mat<std::complex<typename T1::pod_type>> & out,Mat<std::complex<typename T1::pod_type>> & A,const Base<std::complex<typename T1::pod_type>,T1> & B_expr)6008 auxlib::solve_tridiag_fast(Mat< std::complex<typename T1::pod_type> >& out, Mat< std::complex<typename T1::pod_type> >& A, const Base< std::complex<typename T1::pod_type>,T1>& B_expr)
6009 {
6010 arma_extra_debug_sigprint();
6011
6012 #if defined(ARMA_CRIPPLED_LAPACK)
6013 {
6014 arma_extra_debug_print("auxlib::solve_tridiag_fast(): redirecting to auxlib::solve_square_fast() due to crippled LAPACK");
6015
6016 return auxlib::solve_square_fast(out, A, B_expr);
6017 }
6018 #else
6019 {
6020 return auxlib::solve_tridiag_fast_common(out, A, B_expr);
6021 }
6022 #endif
6023 }
6024
6025
6026
6027 //! solve a system of linear equations via Gaussian elimination with partial pivoting (tridiagonal band matrix)
6028 template<typename T1>
6029 inline
6030 bool
solve_tridiag_fast_common(Mat<typename T1::elem_type> & out,const Mat<typename T1::elem_type> & A,const Base<typename T1::elem_type,T1> & B_expr)6031 auxlib::solve_tridiag_fast_common(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr)
6032 {
6033 arma_extra_debug_sigprint();
6034
6035 #if defined(ARMA_USE_LAPACK)
6036 {
6037 typedef typename T1::elem_type eT;
6038
6039 out = B_expr.get_ref();
6040
6041 const uword B_n_rows = out.n_rows;
6042 const uword B_n_cols = out.n_cols;
6043
6044 arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" );
6045
6046 if(A.is_empty() || out.is_empty())
6047 {
6048 out.zeros(A.n_rows, B_n_cols);
6049 return true;
6050 }
6051
6052 Mat<eT> tridiag;
6053 band_helper::extract_tridiag(tridiag, A);
6054
6055 arma_debug_assert_blas_size(tridiag, out);
6056
6057 blas_int n = blas_int(A.n_rows);
6058 blas_int nrhs = blas_int(B_n_cols);
6059 blas_int ldb = blas_int(B_n_rows);
6060 blas_int info = blas_int(0);
6061
6062 arma_extra_debug_print("lapack::gtsv()");
6063 lapack::gtsv<eT>(&n, &nrhs, tridiag.colptr(0), tridiag.colptr(1), tridiag.colptr(2), out.memptr(), &ldb, &info);
6064
6065 return (info == 0);
6066 }
6067 #else
6068 {
6069 arma_ignore(out);
6070 arma_ignore(A);
6071 arma_ignore(B_expr);
6072 arma_stop_logic_error("solve(): use of LAPACK must be enabled");
6073 return false;
6074 }
6075 #endif
6076 }
6077
6078
6079
6080 //
6081 // Schur decomposition
6082
6083 template<typename eT, typename T1>
6084 inline
6085 bool
schur(Mat<eT> & U,Mat<eT> & S,const Base<eT,T1> & X,const bool calc_U)6086 auxlib::schur(Mat<eT>& U, Mat<eT>& S, const Base<eT,T1>& X, const bool calc_U)
6087 {
6088 arma_extra_debug_sigprint();
6089
6090 #if defined(ARMA_USE_LAPACK)
6091 {
6092 S = X.get_ref();
6093
6094 arma_debug_check( (S.is_square() == false), "schur(): given matrix must be square sized" );
6095
6096 if(S.is_empty())
6097 {
6098 U.reset();
6099 S.reset();
6100 return true;
6101 }
6102
6103 arma_debug_assert_blas_size(S);
6104
6105 const uword S_n_rows = S.n_rows;
6106
6107 if(calc_U) { U.set_size(S_n_rows, S_n_rows); } else { U.set_size(1,1); }
6108
6109 char jobvs = calc_U ? 'V' : 'N';
6110 char sort = 'N';
6111 void* select = 0;
6112 blas_int n = blas_int(S_n_rows);
6113 blas_int sdim = 0;
6114 blas_int ldvs = calc_U ? n : blas_int(1);
6115 blas_int lwork = 64*n; // lwork_min = (std::max)(blas_int(1), 3*n)
6116 blas_int info = 0;
6117
6118 podarray<eT> wr(S_n_rows);
6119 podarray<eT> wi(S_n_rows);
6120
6121 podarray<eT> work( static_cast<uword>(lwork) );
6122 podarray<blas_int> bwork(S_n_rows);
6123
6124 arma_extra_debug_print("lapack::gees()");
6125 lapack::gees(&jobvs, &sort, select, &n, S.memptr(), &n, &sdim, wr.memptr(), wi.memptr(), U.memptr(), &ldvs, work.memptr(), &lwork, bwork.memptr(), &info);
6126
6127 return (info == 0);
6128 }
6129 #else
6130 {
6131 arma_ignore(U);
6132 arma_ignore(S);
6133 arma_ignore(X);
6134 arma_ignore(calc_U);
6135 arma_stop_logic_error("schur(): use of LAPACK must be enabled");
6136 return false;
6137 }
6138 #endif
6139 }
6140
6141
6142
6143 template<typename T, typename T1>
6144 inline
6145 bool
schur(Mat<std::complex<T>> & U,Mat<std::complex<T>> & S,const Base<std::complex<T>,T1> & X,const bool calc_U)6146 auxlib::schur(Mat< std::complex<T> >& U, Mat< std::complex<T> >& S, const Base<std::complex<T>,T1>& X, const bool calc_U)
6147 {
6148 arma_extra_debug_sigprint();
6149
6150 S = X.get_ref();
6151
6152 arma_debug_check( (S.is_square() == false), "schur(): given matrix must be square sized" );
6153
6154 return auxlib::schur(U,S,calc_U);
6155 }
6156
6157
6158
6159 template<typename T>
6160 inline
6161 bool
schur(Mat<std::complex<T>> & U,Mat<std::complex<T>> & S,const bool calc_U)6162 auxlib::schur(Mat< std::complex<T> >& U, Mat< std::complex<T> >& S, const bool calc_U)
6163 {
6164 arma_extra_debug_sigprint();
6165
6166 #if defined(ARMA_USE_LAPACK)
6167 {
6168 typedef std::complex<T> eT;
6169
6170 if(S.is_empty())
6171 {
6172 U.reset();
6173 S.reset();
6174 return true;
6175 }
6176
6177 arma_debug_assert_blas_size(S);
6178
6179 const uword S_n_rows = S.n_rows;
6180
6181 if(calc_U) { U.set_size(S_n_rows, S_n_rows); } else { U.set_size(1,1); }
6182
6183 char jobvs = calc_U ? 'V' : 'N';
6184 char sort = 'N';
6185 void* select = 0;
6186 blas_int n = blas_int(S_n_rows);
6187 blas_int sdim = 0;
6188 blas_int ldvs = calc_U ? n : blas_int(1);
6189 blas_int lwork = 64*n; // lwork_min = (std::max)(blas_int(1), 2*n)
6190 blas_int info = 0;
6191
6192 podarray<eT> w(S_n_rows);
6193 podarray<eT> work( static_cast<uword>(lwork) );
6194 podarray< T> rwork(S_n_rows);
6195 podarray<blas_int> bwork(S_n_rows);
6196
6197 arma_extra_debug_print("lapack::cx_gees()");
6198 lapack::cx_gees(&jobvs, &sort, select, &n, S.memptr(), &n, &sdim, w.memptr(), U.memptr(), &ldvs, work.memptr(), &lwork, rwork.memptr(), bwork.memptr(), &info);
6199
6200 return (info == 0);
6201 }
6202 #else
6203 {
6204 arma_ignore(U);
6205 arma_ignore(S);
6206 arma_ignore(calc_U);
6207 arma_stop_logic_error("schur(): use of LAPACK must be enabled");
6208 return false;
6209 }
6210 #endif
6211 }
6212
6213
6214
6215 //
6216 // solve the Sylvester equation AX + XB = C
6217
6218 template<typename eT>
6219 inline
6220 bool
syl(Mat<eT> & X,const Mat<eT> & A,const Mat<eT> & B,const Mat<eT> & C)6221 auxlib::syl(Mat<eT>& X, const Mat<eT>& A, const Mat<eT>& B, const Mat<eT>& C)
6222 {
6223 arma_extra_debug_sigprint();
6224
6225 #if defined(ARMA_USE_LAPACK)
6226 {
6227 arma_debug_check( (A.is_square() == false) || (B.is_square() == false), "syl(): given matrices must be square sized" );
6228
6229 arma_debug_check( (C.n_rows != A.n_rows) || (C.n_cols != B.n_cols), "syl(): matrices are not conformant" );
6230
6231 if(A.is_empty() || B.is_empty() || C.is_empty()) { X.reset(); return true; }
6232
6233 Mat<eT> Z1, Z2, T1, T2;
6234
6235 const bool status_sd1 = auxlib::schur(Z1, T1, A);
6236 const bool status_sd2 = auxlib::schur(Z2, T2, B);
6237
6238 if( (status_sd1 == false) || (status_sd2 == false) ) { return false; }
6239
6240 char trana = 'N';
6241 char tranb = 'N';
6242 blas_int isgn = +1;
6243 blas_int m = blas_int(T1.n_rows);
6244 blas_int n = blas_int(T2.n_cols);
6245
6246 eT scale = eT(0);
6247 blas_int info = 0;
6248
6249 Mat<eT> Y = trans(Z1) * C * Z2;
6250
6251 arma_extra_debug_print("lapack::trsyl()");
6252 lapack::trsyl<eT>(&trana, &tranb, &isgn, &m, &n, T1.memptr(), &m, T2.memptr(), &n, Y.memptr(), &m, &scale, &info);
6253
6254 if(info < 0) { return false; }
6255
6256 //Y /= scale;
6257 Y /= (-scale);
6258
6259 X = Z1 * Y * trans(Z2);
6260
6261 return true;
6262 }
6263 #else
6264 {
6265 arma_ignore(X);
6266 arma_ignore(A);
6267 arma_ignore(B);
6268 arma_ignore(C);
6269 arma_stop_logic_error("syl(): use of LAPACK must be enabled");
6270 return false;
6271 }
6272 #endif
6273 }
6274
6275
6276
6277 //
6278 // QZ decomposition of general square real matrix pair
6279
6280 template<typename T, typename T1, typename T2>
6281 inline
6282 bool
qz(Mat<T> & A,Mat<T> & B,Mat<T> & vsl,Mat<T> & vsr,const Base<T,T1> & X_expr,const Base<T,T2> & Y_expr,const char mode)6283 auxlib::qz(Mat<T>& A, Mat<T>& B, Mat<T>& vsl, Mat<T>& vsr, const Base<T,T1>& X_expr, const Base<T,T2>& Y_expr, const char mode)
6284 {
6285 arma_extra_debug_sigprint();
6286
6287 #if defined(ARMA_USE_LAPACK)
6288 {
6289 A = X_expr.get_ref();
6290 B = Y_expr.get_ref();
6291
6292 arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized" );
6293
6294 arma_debug_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" );
6295
6296 if(A.is_empty())
6297 {
6298 A.reset();
6299 B.reset();
6300 vsl.reset();
6301 vsr.reset();
6302 return true;
6303 }
6304
6305 arma_debug_assert_blas_size(A);
6306
6307 vsl.set_size(A.n_rows, A.n_rows);
6308 vsr.set_size(A.n_rows, A.n_rows);
6309
6310 char jobvsl = 'V';
6311 char jobvsr = 'V';
6312 char eigsort = 'N';
6313 void* selctg = 0;
6314 blas_int N = blas_int(A.n_rows);
6315 blas_int sdim = 0;
6316 blas_int lwork = 64*N+16; // lwork_min = (std::max)(blas_int(1),8*N+16)
6317 blas_int info = 0;
6318
6319 if(mode == 'l') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_lhp<T>)); }
6320 else if(mode == 'r') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_rhp<T>)); }
6321 else if(mode == 'i') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_iuc<T>)); }
6322 else if(mode == 'o') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_ouc<T>)); }
6323
6324 podarray<T> alphar(A.n_rows);
6325 podarray<T> alphai(A.n_rows);
6326 podarray<T> beta(A.n_rows);
6327
6328 podarray<T> work( static_cast<uword>(lwork) );
6329 podarray<blas_int> bwork( static_cast<uword>(N) );
6330
6331 arma_extra_debug_print("lapack::gges()");
6332
6333 lapack::gges
6334 (
6335 &jobvsl, &jobvsr, &eigsort, selctg, &N,
6336 A.memptr(), &N, B.memptr(), &N, &sdim,
6337 alphar.memptr(), alphai.memptr(), beta.memptr(),
6338 vsl.memptr(), &N, vsr.memptr(), &N,
6339 work.memptr(), &lwork, bwork.memptr(),
6340 &info
6341 );
6342
6343 if(info != 0) { return false; }
6344
6345 op_strans::apply_mat_inplace(vsl);
6346
6347 return true;
6348 }
6349 #else
6350 {
6351 arma_ignore(A);
6352 arma_ignore(B);
6353 arma_ignore(vsl);
6354 arma_ignore(vsr);
6355 arma_ignore(X_expr);
6356 arma_ignore(Y_expr);
6357 arma_ignore(mode);
6358 arma_stop_logic_error("qz(): use of LAPACK must be enabled");
6359 return false;
6360 }
6361 #endif
6362 }
6363
6364
6365
6366 //
6367 // QZ decomposition of general square complex matrix pair
6368
6369 template<typename T, typename T1, typename T2>
6370 inline
6371 bool
qz(Mat<std::complex<T>> & A,Mat<std::complex<T>> & B,Mat<std::complex<T>> & vsl,Mat<std::complex<T>> & vsr,const Base<std::complex<T>,T1> & X_expr,const Base<std::complex<T>,T2> & Y_expr,const char mode)6372 auxlib::qz(Mat< std::complex<T> >& A, Mat< std::complex<T> >& B, Mat< std::complex<T> >& vsl, Mat< std::complex<T> >& vsr, const Base< std::complex<T>, T1 >& X_expr, const Base< std::complex<T>, T2 >& Y_expr, const char mode)
6373 {
6374 arma_extra_debug_sigprint();
6375
6376 #if defined(ARMA_USE_LAPACK)
6377 {
6378 typedef typename std::complex<T> eT;
6379
6380 A = X_expr.get_ref();
6381 B = Y_expr.get_ref();
6382
6383 arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized" );
6384
6385 arma_debug_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" );
6386
6387 if(A.is_empty())
6388 {
6389 A.reset();
6390 B.reset();
6391 vsl.reset();
6392 vsr.reset();
6393 return true;
6394 }
6395
6396 arma_debug_assert_blas_size(A);
6397
6398 vsl.set_size(A.n_rows, A.n_rows);
6399 vsr.set_size(A.n_rows, A.n_rows);
6400
6401 char jobvsl = 'V';
6402 char jobvsr = 'V';
6403 char eigsort = 'N';
6404 void* selctg = 0;
6405 blas_int N = blas_int(A.n_rows);
6406 blas_int sdim = 0;
6407 blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1),2*N)
6408 blas_int info = 0;
6409
6410 if(mode == 'l') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_lhp<T>)); }
6411 else if(mode == 'r') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_rhp<T>)); }
6412 else if(mode == 'i') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_iuc<T>)); }
6413 else if(mode == 'o') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_ouc<T>)); }
6414
6415 podarray<eT> alpha(A.n_rows);
6416 podarray<eT> beta(A.n_rows);
6417
6418 podarray<eT> work( static_cast<uword>(lwork) );
6419 podarray< T> rwork( static_cast<uword>(8*N) );
6420 podarray<blas_int> bwork( static_cast<uword>(N) );
6421
6422 arma_extra_debug_print("lapack::cx_gges()");
6423
6424 lapack::cx_gges
6425 (
6426 &jobvsl, &jobvsr, &eigsort, selctg, &N,
6427 A.memptr(), &N, B.memptr(), &N, &sdim,
6428 alpha.memptr(), beta.memptr(),
6429 vsl.memptr(), &N, vsr.memptr(), &N,
6430 work.memptr(), &lwork, rwork.memptr(), bwork.memptr(),
6431 &info
6432 );
6433
6434 if(info != 0) { return false; }
6435
6436 op_htrans::apply_mat_inplace(vsl);
6437
6438 return true;
6439 }
6440 #else
6441 {
6442 arma_ignore(A);
6443 arma_ignore(B);
6444 arma_ignore(vsl);
6445 arma_ignore(vsr);
6446 arma_ignore(X_expr);
6447 arma_ignore(Y_expr);
6448 arma_ignore(mode);
6449 arma_stop_logic_error("qz(): use of LAPACK must be enabled");
6450 return false;
6451 }
6452 #endif
6453 }
6454
6455
6456
6457 template<typename eT>
6458 inline
6459 eT
rcond(Mat<eT> & A)6460 auxlib::rcond(Mat<eT>& A)
6461 {
6462 #if defined(ARMA_USE_LAPACK)
6463 {
6464 arma_debug_assert_blas_size(A);
6465
6466 char norm_id = '1';
6467 blas_int m = blas_int(A.n_rows);
6468 blas_int n = blas_int(A.n_rows); // assuming square matrix
6469 blas_int lda = blas_int(A.n_rows);
6470 eT norm_val = eT(0);
6471 eT rcond = eT(0);
6472 blas_int info = blas_int(0);
6473
6474 podarray<eT> work(4*A.n_rows);
6475 podarray<blas_int> iwork( A.n_rows);
6476 podarray<blas_int> ipiv( (std::min)(A.n_rows, A.n_cols) );
6477
6478 arma_extra_debug_print("lapack::lange()");
6479 norm_val = lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, work.memptr());
6480
6481 arma_extra_debug_print("lapack::getrf()");
6482 lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info);
6483
6484 if(info != blas_int(0)) { return eT(0); }
6485
6486 arma_extra_debug_print("lapack::gecon()");
6487 lapack::gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info);
6488
6489 if(info != blas_int(0)) { return eT(0); }
6490
6491 return rcond;
6492 }
6493 #else
6494 {
6495 arma_ignore(A);
6496 arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
6497 return eT(0);
6498 }
6499 #endif
6500 }
6501
6502
6503
6504 template<typename T>
6505 inline
6506 T
rcond(Mat<std::complex<T>> & A)6507 auxlib::rcond(Mat< std::complex<T> >& A)
6508 {
6509 #if defined(ARMA_USE_LAPACK)
6510 {
6511 typedef typename std::complex<T> eT;
6512
6513 arma_debug_assert_blas_size(A);
6514
6515 char norm_id = '1';
6516 blas_int m = blas_int(A.n_rows);
6517 blas_int n = blas_int(A.n_rows); // assuming square matrix
6518 blas_int lda = blas_int(A.n_rows);
6519 T norm_val = T(0);
6520 T rcond = T(0);
6521 blas_int info = blas_int(0);
6522
6523 podarray< T> junk(1);
6524 podarray<eT> work(2*A.n_rows);
6525 podarray< T> rwork(2*A.n_rows);
6526 podarray<blas_int> ipiv( (std::min)(A.n_rows, A.n_cols) );
6527
6528 arma_extra_debug_print("lapack::lange()");
6529 norm_val = lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, junk.memptr());
6530
6531 arma_extra_debug_print("lapack::getrf()");
6532 lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info);
6533
6534 if(info != blas_int(0)) { return T(0); }
6535
6536 arma_extra_debug_print("lapack::cx_gecon()");
6537 lapack::cx_gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info);
6538
6539 if(info != blas_int(0)) { return T(0); }
6540
6541 return rcond;
6542 }
6543 #else
6544 {
6545 arma_ignore(A);
6546 arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
6547 return T(0);
6548 }
6549 #endif
6550 }
6551
6552
6553
6554 template<typename eT>
6555 inline
6556 eT
rcond_sympd(Mat<eT> & A,bool & calc_ok)6557 auxlib::rcond_sympd(Mat<eT>& A, bool& calc_ok)
6558 {
6559 #if defined(ARMA_USE_LAPACK)
6560 {
6561 arma_debug_assert_blas_size(A);
6562
6563 calc_ok = false;
6564
6565 char norm_id = '1';
6566 char uplo = 'L';
6567 blas_int n = blas_int(A.n_rows); // assuming square matrix
6568 blas_int lda = blas_int(A.n_rows);
6569 eT norm_val = eT(0);
6570 eT rcond = eT(0);
6571 blas_int info = blas_int(0);
6572
6573 podarray<eT> work(3*A.n_rows);
6574 podarray<blas_int> iwork( A.n_rows);
6575
6576 arma_extra_debug_print("lapack::lansy()");
6577 norm_val = lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &lda, work.memptr());
6578
6579 arma_extra_debug_print("lapack::potrf()");
6580 lapack::potrf(&uplo, &n, A.memptr(), &lda, &info);
6581
6582 if(info != blas_int(0)) { return eT(0); }
6583
6584 arma_extra_debug_print("lapack::pocon()");
6585 lapack::pocon(&uplo, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info);
6586
6587 if(info != blas_int(0)) { return eT(0); }
6588
6589 calc_ok = true;
6590
6591 return rcond;
6592 }
6593 #else
6594 {
6595 arma_ignore(A);
6596 calc_ok = false;
6597 arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
6598 return eT(0);
6599 }
6600 #endif
6601 }
6602
6603
6604
6605 template<typename T>
6606 inline
6607 T
rcond_sympd(Mat<std::complex<T>> & A,bool & calc_ok)6608 auxlib::rcond_sympd(Mat< std::complex<T> >& A, bool& calc_ok)
6609 {
6610 #if defined(ARMA_CRIPPLED_LAPACK)
6611 {
6612 arma_extra_debug_print("auxlib::rcond_sympd(): redirecting to auxlib::rcond() due to crippled LAPACK");
6613
6614 calc_ok = true;
6615
6616 return auxlib::rcond(A);
6617 }
6618 #elif defined(ARMA_USE_LAPACK)
6619 {
6620 typedef typename std::complex<T> eT;
6621
6622 arma_debug_assert_blas_size(A);
6623
6624 calc_ok = false;
6625
6626 char norm_id = '1';
6627 char uplo = 'L';
6628 blas_int n = blas_int(A.n_rows); // assuming square matrix
6629 blas_int lda = blas_int(A.n_rows);
6630 T norm_val = T(0);
6631 T rcond = T(0);
6632 blas_int info = blas_int(0);
6633
6634 podarray<eT> work(2*A.n_rows);
6635 podarray< T> rwork( A.n_rows);
6636
6637 arma_extra_debug_print("lapack::lanhe()");
6638 norm_val = lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &lda, rwork.memptr());
6639
6640 arma_extra_debug_print("lapack::potrf()");
6641 lapack::potrf(&uplo, &n, A.memptr(), &lda, &info);
6642
6643 if(info != blas_int(0)) { return T(0); }
6644
6645 arma_extra_debug_print("lapack::cx_pocon()");
6646 lapack::cx_pocon(&uplo, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info);
6647
6648 if(info != blas_int(0)) { return T(0); }
6649
6650 calc_ok = true;
6651
6652 return rcond;
6653 }
6654 #else
6655 {
6656 arma_ignore(A);
6657 calc_ok = false;
6658 arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
6659 return T(0);
6660 }
6661 #endif
6662 }
6663
6664
6665
6666 template<typename eT>
6667 inline
6668 eT
rcond_trimat(const Mat<eT> & A,const uword layout)6669 auxlib::rcond_trimat(const Mat<eT>& A, const uword layout)
6670 {
6671 #if defined(ARMA_USE_LAPACK)
6672 {
6673 arma_debug_assert_blas_size(A);
6674
6675 char norm_id = '1';
6676 char uplo = (layout == 0) ? 'U' : 'L';
6677 char diag = 'N';
6678 blas_int n = blas_int(A.n_rows); // assuming square matrix
6679 eT rcond = eT(0);
6680 blas_int info = blas_int(0);
6681
6682 podarray<eT> work(3*A.n_rows);
6683 podarray<blas_int> iwork( A.n_rows);
6684
6685 arma_extra_debug_print("lapack::trcon()");
6686 lapack::trcon(&norm_id, &uplo, &diag, &n, A.memptr(), &n, &rcond, work.memptr(), iwork.memptr(), &info);
6687
6688 if(info != blas_int(0)) { return eT(0); }
6689
6690 return rcond;
6691 }
6692 #else
6693 {
6694 arma_ignore(A);
6695 arma_ignore(layout);
6696 arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
6697 return eT(0);
6698 }
6699 #endif
6700 }
6701
6702
6703
6704 template<typename T>
6705 inline
6706 T
rcond_trimat(const Mat<std::complex<T>> & A,const uword layout)6707 auxlib::rcond_trimat(const Mat< std::complex<T> >& A, const uword layout)
6708 {
6709 #if defined(ARMA_USE_LAPACK)
6710 {
6711 typedef typename std::complex<T> eT;
6712
6713 arma_debug_assert_blas_size(A);
6714
6715 char norm_id = '1';
6716 char uplo = (layout == 0) ? 'U' : 'L';
6717 char diag = 'N';
6718 blas_int n = blas_int(A.n_rows); // assuming square matrix
6719 T rcond = T(0);
6720 blas_int info = blas_int(0);
6721
6722 podarray<eT> work(2*A.n_rows);
6723 podarray< T> rwork( A.n_rows);
6724
6725 arma_extra_debug_print("lapack::cx_trcon()");
6726 lapack::cx_trcon(&norm_id, &uplo, &diag, &n, A.memptr(), &n, &rcond, work.memptr(), rwork.memptr(), &info);
6727
6728 if(info != blas_int(0)) { return T(0); }
6729
6730 return rcond;
6731 }
6732 #else
6733 {
6734 arma_ignore(A);
6735 arma_ignore(layout);
6736 arma_stop_logic_error("rcond(): use of LAPACK must be enabled");
6737 return T(0);
6738 }
6739 #endif
6740 }
6741
6742
6743
6744 template<typename eT>
6745 inline
6746 eT
lu_rcond(const Mat<eT> & A,const eT norm_val)6747 auxlib::lu_rcond(const Mat<eT>& A, const eT norm_val)
6748 {
6749 #if defined(ARMA_USE_LAPACK)
6750 {
6751 char norm_id = '1';
6752 blas_int n = blas_int(A.n_rows); // assuming square matrix
6753 blas_int lda = blas_int(A.n_rows);
6754 eT rcond = eT(0);
6755 blas_int info = blas_int(0);
6756
6757 podarray<eT> work(4*A.n_rows);
6758 podarray<blas_int> iwork( A.n_rows);
6759
6760 arma_extra_debug_print("lapack::gecon()");
6761 lapack::gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info);
6762
6763 if(info != blas_int(0)) { return eT(0); }
6764
6765 return rcond;
6766 }
6767 #else
6768 {
6769 arma_ignore(A);
6770 arma_ignore(norm_val);
6771 return eT(0);
6772 }
6773 #endif
6774 }
6775
6776
6777
6778 template<typename T>
6779 inline
6780 T
lu_rcond(const Mat<std::complex<T>> & A,const T norm_val)6781 auxlib::lu_rcond(const Mat< std::complex<T> >& A, const T norm_val)
6782 {
6783 #if defined(ARMA_USE_LAPACK)
6784 {
6785 typedef typename std::complex<T> eT;
6786
6787 char norm_id = '1';
6788 blas_int n = blas_int(A.n_rows); // assuming square matrix
6789 blas_int lda = blas_int(A.n_rows);
6790 T rcond = T(0);
6791 blas_int info = blas_int(0);
6792
6793 podarray<eT> work(2*A.n_rows);
6794 podarray< T> rwork(2*A.n_rows);
6795
6796 arma_extra_debug_print("lapack::cx_gecon()");
6797 lapack::cx_gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info);
6798
6799 if(info != blas_int(0)) { return T(0); }
6800
6801 return rcond;
6802 }
6803 #else
6804 {
6805 arma_ignore(A);
6806 arma_ignore(norm_val);
6807 return T(0);
6808 }
6809 #endif
6810 }
6811
6812
6813
6814 template<typename eT>
6815 inline
6816 eT
lu_rcond_sympd(const Mat<eT> & A,const eT norm_val)6817 auxlib::lu_rcond_sympd(const Mat<eT>& A, const eT norm_val)
6818 {
6819 #if defined(ARMA_USE_LAPACK)
6820 {
6821 char uplo = 'L';
6822 blas_int n = blas_int(A.n_rows); // assuming square matrix
6823 eT rcond = eT(0);
6824 blas_int info = blas_int(0);
6825
6826 podarray<eT> work(3*A.n_rows);
6827 podarray<blas_int> iwork( A.n_rows);
6828
6829 arma_extra_debug_print("lapack::pocon()");
6830 lapack::pocon(&uplo, &n, A.memptr(), &n, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info);
6831
6832 if(info != blas_int(0)) { return eT(0); }
6833
6834 return rcond;
6835 }
6836 #else
6837 {
6838 arma_ignore(A);
6839 arma_ignore(norm_val);
6840 return eT(0);
6841 }
6842 #endif
6843 }
6844
6845
6846
6847 template<typename T>
6848 inline
6849 T
lu_rcond_sympd(const Mat<std::complex<T>> & A,const T norm_val)6850 auxlib::lu_rcond_sympd(const Mat< std::complex<T> >& A, const T norm_val)
6851 {
6852 #if defined(ARMA_CRIPPLED_LAPACK)
6853 {
6854 arma_ignore(A);
6855 arma_ignore(norm_val);
6856 return T(0);
6857 }
6858 #elif defined(ARMA_USE_LAPACK)
6859 {
6860 typedef typename std::complex<T> eT;
6861
6862 char uplo = 'L';
6863 blas_int n = blas_int(A.n_rows); // assuming square matrix
6864 T rcond = T(0);
6865 blas_int info = blas_int(0);
6866
6867 podarray<eT> work(2*A.n_rows);
6868 podarray< T> rwork( A.n_rows);
6869
6870 arma_extra_debug_print("lapack::cx_pocon()");
6871 lapack::cx_pocon(&uplo, &n, A.memptr(), &n, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info);
6872
6873 if(info != blas_int(0)) { return T(0); }
6874
6875 return rcond;
6876 }
6877 #else
6878 {
6879 arma_ignore(A);
6880 arma_ignore(norm_val);
6881 return T(0);
6882 }
6883 #endif
6884 }
6885
6886
6887
6888 template<typename eT>
6889 inline
6890 eT
lu_rcond_band(const Mat<eT> & AB,const uword KL,const uword KU,const podarray<blas_int> & ipiv,const eT norm_val)6891 auxlib::lu_rcond_band(const Mat<eT>& AB, const uword KL, const uword KU, const podarray<blas_int>& ipiv, const eT norm_val)
6892 {
6893 #if defined(ARMA_USE_LAPACK)
6894 {
6895 const uword N = AB.n_cols; // order of the original square matrix A
6896
6897 char norm_id = '1';
6898 blas_int n = blas_int(N);
6899 blas_int kl = blas_int(KL);
6900 blas_int ku = blas_int(KU);
6901 blas_int ldab = blas_int(AB.n_rows);
6902 eT rcond = eT(0);
6903 blas_int info = blas_int(0);
6904
6905 podarray<eT> work(3*N);
6906 podarray<blas_int> iwork( N);
6907
6908 arma_extra_debug_print("lapack::gbcon()");
6909 lapack::gbcon<eT>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &norm_val, &rcond, work.memptr(), iwork.memptr(), &info);
6910
6911 if(info != blas_int(0)) { return eT(0); }
6912
6913 return rcond;
6914 }
6915 #else
6916 {
6917 arma_ignore(AB);
6918 arma_ignore(KL);
6919 arma_ignore(KU);
6920 arma_ignore(ipiv);
6921 arma_ignore(norm_val);
6922 return eT(0);
6923 }
6924 #endif
6925 }
6926
6927
6928
6929 template<typename T>
6930 inline
6931 T
lu_rcond_band(const Mat<std::complex<T>> & AB,const uword KL,const uword KU,const podarray<blas_int> & ipiv,const T norm_val)6932 auxlib::lu_rcond_band(const Mat< std::complex<T> >& AB, const uword KL, const uword KU, const podarray<blas_int>& ipiv, const T norm_val)
6933 {
6934 #if defined(ARMA_CRIPPLED_LAPACK)
6935 {
6936 arma_ignore(AB);
6937 arma_ignore(KL);
6938 arma_ignore(KU);
6939 arma_ignore(ipiv);
6940 arma_ignore(norm_val);
6941 return T(0);
6942 }
6943 #elif defined(ARMA_USE_LAPACK)
6944 {
6945 typedef typename std::complex<T> eT;
6946
6947 const uword N = AB.n_cols; // order of the original square matrix A
6948
6949 char norm_id = '1';
6950 blas_int n = blas_int(N);
6951 blas_int kl = blas_int(KL);
6952 blas_int ku = blas_int(KU);
6953 blas_int ldab = blas_int(AB.n_rows);
6954 T rcond = T(0);
6955 blas_int info = blas_int(0);
6956
6957 podarray<eT> work(2*N);
6958 podarray< T> rwork( N);
6959
6960 arma_extra_debug_print("lapack::cx_gbcon()");
6961 lapack::cx_gbcon<T>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &norm_val, &rcond, work.memptr(), rwork.memptr(), &info);
6962
6963 if(info != blas_int(0)) { return T(0); }
6964
6965 return rcond;
6966 }
6967 #else
6968 {
6969 arma_ignore(AB);
6970 arma_ignore(KL);
6971 arma_ignore(KU);
6972 arma_ignore(ipiv);
6973 arma_ignore(norm_val);
6974 return T(0);
6975 }
6976 #endif
6977 }
6978
6979
6980
6981 template<typename T1>
6982 inline
6983 bool
crippled_lapack(const Base<typename T1::elem_type,T1> &)6984 auxlib::crippled_lapack(const Base<typename T1::elem_type, T1>&)
6985 {
6986 #if defined(ARMA_CRIPPLED_LAPACK)
6987 {
6988 arma_extra_debug_print("auxlib::crippled_lapack(): true");
6989
6990 return (is_cx<typename T1::elem_type>::yes);
6991 }
6992 #else
6993 {
6994 return false;
6995 }
6996 #endif
6997 }
6998
6999
7000
7001 template<typename T1>
7002 inline
7003 typename T1::pod_type
epsilon_lapack(const Base<typename T1::elem_type,T1> &)7004 auxlib::epsilon_lapack(const Base<typename T1::elem_type, T1>&)
7005 {
7006 typedef typename T1::pod_type T;
7007
7008 return T(0.5)*std::numeric_limits<T>::epsilon();
7009
7010 // value reverse engineered from dgesvx.f and dlamch.f
7011 // http://www.netlib.org/lapack/explore-html/da/d21/dgesvx_8f.html
7012 // http://www.netlib.org/lapack/explore-html/d5/dd4/dlamch_8f.html
7013 //
7014 // Fortran epsilon(X) function:
7015 // https://gcc.gnu.org/onlinedocs/gfortran/EPSILON.html
7016 // "EPSILON(X) returns the smallest number E of the same kind as X such that 1 + E > 1"
7017 //
7018 // C++ std::numeric_limits<T>::epsilon() function:
7019 // https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
7020 // "the difference between 1.0 and the next value representable by the floating-point type T"
7021 //
7022 // extract from dgesvx.f:
7023 //
7024 // IF( rcond.LT.dlamch( 'Epsilon' ) )
7025 // info = n + 1
7026 // RETURN
7027 //
7028 // extract from dlamch.f:
7029 //
7030 // * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise
7031 // ...
7032 // * Assume rounding, not chopping. Always
7033 //
7034 // rnd = one
7035 //
7036 // IF( one.EQ.rnd ) THEN
7037 // eps = epsilon(zero) * 0.5
7038 // ELSE
7039 // eps = epsilon(zero)
7040 // END IF
7041 // ...
7042 // IF( lsame( cmach, 'E' ) ) THEN
7043 // rmach = eps
7044 // ...
7045 // END IF
7046 // ...
7047 // dlamch = rmach
7048 // RETURN
7049 }
7050
7051
7052
7053 template<typename eT>
7054 inline
7055 bool
rudimentary_sym_check(const Mat<eT> & X)7056 auxlib::rudimentary_sym_check(const Mat<eT>& X)
7057 {
7058 arma_extra_debug_sigprint();
7059
7060 const uword N = X.n_rows;
7061 const uword Nm2 = N-2;
7062
7063 if(N != X.n_cols) { return false; }
7064 if(N <= uword(1)) { return true; }
7065
7066 const eT* X_mem = X.memptr();
7067
7068 const eT* X_offsetA = &(X_mem[Nm2 ]);
7069 const eT* X_offsetB = &(X_mem[Nm2*N]);
7070
7071 const eT A1 = *(X_offsetA );
7072 const eT A2 = *(X_offsetA+1); // bottom-left corner (ie. last value in first column)
7073 const eT B1 = *(X_offsetB );
7074 const eT B2 = *(X_offsetB+N); // top-right corner (ie. first value in last column)
7075
7076 const eT C1 = (std::max)(std::abs(A1), std::abs(B1));
7077 const eT C2 = (std::max)(std::abs(A2), std::abs(B2));
7078
7079 const eT delta1 = std::abs(A1 - B1);
7080 const eT delta2 = std::abs(A2 - B2);
7081
7082 const eT tol = eT(10000)*std::numeric_limits<eT>::epsilon(); // allow some leeway
7083
7084 const bool okay1 = ( (delta1 <= tol) || (delta1 <= (C1 * tol)) );
7085 const bool okay2 = ( (delta2 <= tol) || (delta2 <= (C2 * tol)) );
7086
7087 return (okay1 && okay2);
7088 }
7089
7090
7091
7092 template<typename T>
7093 inline
7094 bool
rudimentary_sym_check(const Mat<std::complex<T>> & X)7095 auxlib::rudimentary_sym_check(const Mat< std::complex<T> >& X)
7096 {
7097 arma_extra_debug_sigprint();
7098
7099 // NOTE: the function name is a misnomer, as it checks for hermitian complex matrices;
7100 // NOTE: for simplicity of use, the function name is the same as for real matrices
7101
7102 typedef typename std::complex<T> eT;
7103
7104 const uword N = X.n_rows;
7105 const uword Nm1 = N-1;
7106
7107 if(N != X.n_cols) { return false; }
7108 if(N == uword(0)) { return true; }
7109
7110 const eT* X_mem = X.memptr();
7111
7112 const T tol = T(10000)*std::numeric_limits<T>::epsilon(); // allow some leeway
7113
7114 if(std::abs(X_mem[0].imag()) > tol) { return false; }
7115
7116 const eT& A = X_mem[Nm1 ]; // bottom-left corner (ie. last value in first column)
7117 const eT& B = X_mem[Nm1*N]; // top-right corner (ie. first value in last column)
7118
7119 const T C_real = (std::max)(std::abs(A.real()), std::abs(B.real()));
7120 const T C_imag = (std::max)(std::abs(A.imag()), std::abs(B.imag()));
7121
7122 const T delta_real = std::abs(A.real() - B.real());
7123 const T delta_imag = std::abs(A.imag() + B.imag()); // take into account the conjugate
7124
7125 const bool okay_real = ( (delta_real <= tol) || (delta_real <= (C_real * tol)) );
7126 const bool okay_imag = ( (delta_imag <= tol) || (delta_imag <= (C_imag * tol)) );
7127
7128 return (okay_real && okay_imag);
7129 }
7130
7131
7132
7133 //
7134
7135
7136
7137 namespace qz_helper
7138 {
7139
7140 // sgges() and dgges() require an external function with three arguments:
7141 // select(alpha_real, alpha_imag, beta)
7142 // where the eigenvalue is defined as complex(alpha_real, alpha_imag) / beta
7143
7144 template<typename T>
7145 inline
7146 blas_int
select_lhp(const T * x_ptr,const T * y_ptr,const T * z_ptr)7147 select_lhp(const T* x_ptr, const T* y_ptr, const T* z_ptr)
7148 {
7149 arma_extra_debug_sigprint();
7150
7151 // cout << "select_lhp(): (*x_ptr) = " << (*x_ptr) << endl;
7152 // cout << "select_lhp(): (*y_ptr) = " << (*y_ptr) << endl;
7153 // cout << "select_lhp(): (*z_ptr) = " << (*z_ptr) << endl;
7154
7155 arma_ignore(y_ptr); // ignore imaginary part
7156
7157 const T x = (*x_ptr);
7158 const T z = (*z_ptr);
7159
7160 if(z == T(0)) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp
7161
7162 return ((x/z) < T(0)) ? blas_int(1) : blas_int(0);
7163 }
7164
7165
7166
7167 template<typename T>
7168 inline
7169 blas_int
select_rhp(const T * x_ptr,const T * y_ptr,const T * z_ptr)7170 select_rhp(const T* x_ptr, const T* y_ptr, const T* z_ptr)
7171 {
7172 arma_extra_debug_sigprint();
7173
7174 // cout << "select_rhp(): (*x_ptr) = " << (*x_ptr) << endl;
7175 // cout << "select_rhp(): (*y_ptr) = " << (*y_ptr) << endl;
7176 // cout << "select_rhp(): (*z_ptr) = " << (*z_ptr) << endl;
7177
7178 arma_ignore(y_ptr); // ignore imaginary part
7179
7180 const T x = (*x_ptr);
7181 const T z = (*z_ptr);
7182
7183 if(z == T(0)) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp
7184
7185 return ((x/z) > T(0)) ? blas_int(1) : blas_int(0);
7186 }
7187
7188
7189
7190 template<typename T>
7191 inline
7192 blas_int
select_iuc(const T * x_ptr,const T * y_ptr,const T * z_ptr)7193 select_iuc(const T* x_ptr, const T* y_ptr, const T* z_ptr)
7194 {
7195 arma_extra_debug_sigprint();
7196
7197 // cout << "select_iuc(): (*x_ptr) = " << (*x_ptr) << endl;
7198 // cout << "select_iuc(): (*y_ptr) = " << (*y_ptr) << endl;
7199 // cout << "select_iuc(): (*z_ptr) = " << (*z_ptr) << endl;
7200
7201 const T x = (*x_ptr);
7202 const T y = (*y_ptr);
7203 const T z = (*z_ptr);
7204
7205 if(z == T(0)) { return blas_int(0); } // consider an infinite eig value to be outside of the unit circle
7206
7207 //return (std::abs(std::complex<T>(x,y) / z) < T(1)) ? blas_int(1) : blas_int(0);
7208 return (std::sqrt(x*x + y*y) < std::abs(z)) ? blas_int(1) : blas_int(0);
7209 }
7210
7211
7212
7213 template<typename T>
7214 inline
7215 blas_int
select_ouc(const T * x_ptr,const T * y_ptr,const T * z_ptr)7216 select_ouc(const T* x_ptr, const T* y_ptr, const T* z_ptr)
7217 {
7218 arma_extra_debug_sigprint();
7219
7220 // cout << "select_ouc(): (*x_ptr) = " << (*x_ptr) << endl;
7221 // cout << "select_ouc(): (*y_ptr) = " << (*y_ptr) << endl;
7222 // cout << "select_ouc(): (*z_ptr) = " << (*z_ptr) << endl;
7223
7224 const T x = (*x_ptr);
7225 const T y = (*y_ptr);
7226 const T z = (*z_ptr);
7227
7228 if(z == T(0))
7229 {
7230 return (x == T(0)) ? blas_int(0) : blas_int(1); // consider an infinite eig value to be outside of the unit circle
7231 }
7232
7233 //return (std::abs(std::complex<T>(x,y) / z) > T(1)) ? blas_int(1) : blas_int(0);
7234 return (std::sqrt(x*x + y*y) > std::abs(z)) ? blas_int(1) : blas_int(0);
7235 }
7236
7237
7238
7239 // cgges() and zgges() require an external function with two arguments:
7240 // select(alpha, beta)
7241 // where the complex eigenvalue is defined as (alpha / beta)
7242
7243 template<typename T>
7244 inline
7245 blas_int
cx_select_lhp(const std::complex<T> * x_ptr,const std::complex<T> * y_ptr)7246 cx_select_lhp(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr)
7247 {
7248 arma_extra_debug_sigprint();
7249
7250 // cout << "cx_select_lhp(): (*x_ptr) = " << (*x_ptr) << endl;
7251 // cout << "cx_select_lhp(): (*y_ptr) = " << (*y_ptr) << endl;
7252
7253 const std::complex<T>& x = (*x_ptr);
7254 const std::complex<T>& y = (*y_ptr);
7255
7256 if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp
7257
7258 return (std::real(x / y) < T(0)) ? blas_int(1) : blas_int(0);
7259 }
7260
7261
7262
7263 template<typename T>
7264 inline
7265 blas_int
cx_select_rhp(const std::complex<T> * x_ptr,const std::complex<T> * y_ptr)7266 cx_select_rhp(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr)
7267 {
7268 arma_extra_debug_sigprint();
7269
7270 // cout << "cx_select_rhp(): (*x_ptr) = " << (*x_ptr) << endl;
7271 // cout << "cx_select_rhp(): (*y_ptr) = " << (*y_ptr) << endl;
7272
7273 const std::complex<T>& x = (*x_ptr);
7274 const std::complex<T>& y = (*y_ptr);
7275
7276 if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp
7277
7278 return (std::real(x / y) > T(0)) ? blas_int(1) : blas_int(0);
7279 }
7280
7281
7282
7283 template<typename T>
7284 inline
7285 blas_int
cx_select_iuc(const std::complex<T> * x_ptr,const std::complex<T> * y_ptr)7286 cx_select_iuc(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr)
7287 {
7288 arma_extra_debug_sigprint();
7289
7290 // cout << "cx_select_iuc(): (*x_ptr) = " << (*x_ptr) << endl;
7291 // cout << "cx_select_iuc(): (*y_ptr) = " << (*y_ptr) << endl;
7292
7293 const std::complex<T>& x = (*x_ptr);
7294 const std::complex<T>& y = (*y_ptr);
7295
7296 if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return blas_int(0); } // consider an infinite eig value to be outside of the unit circle
7297
7298 return (std::abs(x / y) < T(1)) ? blas_int(1) : blas_int(0);
7299 }
7300
7301
7302
7303 template<typename T>
7304 inline
7305 blas_int
cx_select_ouc(const std::complex<T> * x_ptr,const std::complex<T> * y_ptr)7306 cx_select_ouc(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr)
7307 {
7308 arma_extra_debug_sigprint();
7309
7310 // cout << "cx_select_ouc(): (*x_ptr) = " << (*x_ptr) << endl;
7311 // cout << "cx_select_ouc(): (*y_ptr) = " << (*y_ptr) << endl;
7312
7313 const std::complex<T>& x = (*x_ptr);
7314 const std::complex<T>& y = (*y_ptr);
7315
7316 if( (y.real() == T(0)) && (y.imag() == T(0)) )
7317 {
7318 return ((x.real() == T(0)) && (x.imag() == T(0))) ? blas_int(0) : blas_int(1); // consider an infinite eig value to be outside of the unit circle
7319 }
7320
7321 return (std::abs(x / y) > T(1)) ? blas_int(1) : blas_int(0);
7322 }
7323
7324
7325
7326 // need to do shenanigans with pointers due to:
7327 // - we're using LAPACK ?gges() defined to expect pointer-to-function to be passed as pointer-to-object
7328 // - explicit casting between pointer-to-function and pointer-to-object is a non-standard extension in C
7329 // - the extension is essentially mandatory on POSIX systems
7330 // - some compilers will complain about the extension in pedantic mode
7331
7332 template<typename T>
7333 inline
7334 void_ptr
ptr_cast(blas_int (* function)(const T *,const T *,const T *))7335 ptr_cast(blas_int (*function)(const T*, const T*, const T*))
7336 {
7337 union converter
7338 {
7339 blas_int (*fn)(const T*, const T*, const T*);
7340 void_ptr obj;
7341 };
7342
7343 converter tmp;
7344
7345 tmp.obj = 0;
7346 tmp.fn = function;
7347
7348 return tmp.obj;
7349 }
7350
7351
7352
7353 template<typename T>
7354 inline
7355 void_ptr
ptr_cast(blas_int (* function)(const std::complex<T> *,const std::complex<T> *))7356 ptr_cast(blas_int (*function)(const std::complex<T>*, const std::complex<T>*))
7357 {
7358 union converter
7359 {
7360 blas_int (*fn)(const std::complex<T>*, const std::complex<T>*);
7361 void_ptr obj;
7362 };
7363
7364 converter tmp;
7365
7366 tmp.obj = 0;
7367 tmp.fn = function;
7368
7369 return tmp.obj;
7370 }
7371
7372
7373
7374 } // end of namespace qz_helper
7375
7376
7377 //! @}
7378