1 /* _______________________________________________________________________
2
3 PECOS: Parallel Environment for Creation Of Stochastics
4 Copyright (c) 2011, Sandia National Laboratories.
5 This software is distributed under the GNU Lesser General Public License.
6 For more information, see the README file in the top Pecos directory.
7 _______________________________________________________________________ */
8
9 #include "linear_algebra.hpp"
10 #include "Teuchos_BLAS_wrappers.hpp"
11 #include "Teuchos_LAPACK_wrappers.hpp"
12 #include "Teuchos_SerialSpdDenseSolver.hpp"
13 #include "teuchos_data_types.hpp"
14 #define disp( a ) std::cout << #a << ": " << ( a ) << std::endl
15
16 // DLASWP and DGEQP3 are in Teuchos_LAPACK_wrappers.hpp
17 #define DPSTRF_F77 F77_BLAS_MANGLE(dpstrf,DPSTRF)
18 // unused:
19 // #define DGETC2_F77 F77_BLAS_MANGLE(dgetc2,DGETC2)
20 // #define DORGRQ_F77 F77_BLAS_MANGLE(dorgrq,DORGRQ)
21 extern "C"
22 {
23 void DPSTRF_F77( const char* UPLO, int *N, double *A,
24 int *LDA, int *PIV, int *RANK, double *TOL, double *WORK,
25 int *INFO );
26
27 // void DGETC2_F77( int *N, double *A, int *LDA, int *IPIV, int *JPIV, int *INFO );
28
29 // void DORGRQ_F77( int *M, int *N, int *K, double *A, int* LDA, double* TAU,
30 // double *WORK, const int *LWORK, int *INFO );
31 }
32
33 namespace Pecos {
34 namespace util {
35
GEMV(Teuchos::ETransp trans,bool conjugate_trans,double alpha,const Teuchos::SerialDenseMatrix<int,double> & matrix,const Teuchos::SerialDenseVector<int,double> & vector,double beta,Teuchos::SerialDenseVector<int,double> & result)36 template<> void GEMV<int,double>(
37 Teuchos::ETransp trans, bool conjugate_trans, double alpha,
38 const Teuchos::SerialDenseMatrix<int,double> &matrix,
39 const Teuchos::SerialDenseVector<int,double> &vector,
40 double beta,
41 Teuchos::SerialDenseVector<int,double> &result){
42
43 int result_len = matrix.numRows();
44 if ((trans==Teuchos::TRANS) || (trans==Teuchos::CONJ_TRANS))
45 result_len = matrix.numCols();
46
47 if (result.length() != result_len){
48 if (beta!=0.)
49 throw(std::runtime_error("result inconsistent with matrix but beta!=0"));
50 result.sizeUninitialized(result_len);
51 }
52
53 int inc=1;
54 char T=Teuchos::ETranspChar[trans];
55 int M=matrix.numRows(), N=matrix.numCols(), stride=matrix.stride();
56 DGEMV_F77( &T, &M, &N, &alpha,
57 matrix.values(), &stride,
58 vector.values(), &inc, &beta, result.values(), &inc );
59 }
60
61 // BMA: Disabled the complex variant since no current use cases
62 // When reactivated will need to edit Dakota and Pecos CMakeLists.txt
63 //#ifdef HAVE_COMPLEX_BLAS
64 //template<> void GEMV<int,std::complex<double> >(
65 // Teuchos::ETransp trans, bool conjugate_trans, std::complex<double> alpha,
66 // const Teuchos::SerialDenseMatrix<int,std::complex<double> >&matrix,
67 // const Teuchos::SerialDenseVector<int,std::complex<double> >&vector,
68 // std::complex<double> beta,
69 // Teuchos::SerialDenseVector<int,std::complex<double> > &result){
70 //
71 // int result_len = matrix.numRows();
72 // if ((trans==Teuchos::TRANS) || (trans==Teuchos::CONJ_TRANS))
73 // result_len = matrix.numCols();
74 //
75 // if (result.length() != result_len){
76 // if (beta!=0.)
77 // throw(std::runtime_error("result inconsistent with matrix but beta!=0"));
78 // result.sizeUninitialized(result_len);
79 // }
80 //
81 // if ((trans==Teuchos::TRANS) && conjugate_trans)
82 // trans = Teuchos::CONJ_TRANS;
83 //
84 // int inc=1;
85 // char T=Teuchos::ETranspChar[trans];
86 // int M=matrix.numRows(), N=matrix.numCols(), stride=matrix.stride();
87 // ZGEMV_F77( &T, &M, &N, &alpha,
88 // matrix.values(), &stride,
89 // vector.values(), &inc, &beta, result.values(), &inc );
90 //}
91 //#endif
92
cholesky(const RealMatrix & A,RealMatrix & result,Teuchos::EUplo uplo,bool for_lapack)93 int cholesky( const RealMatrix &A, RealMatrix &result, Teuchos::EUplo uplo,
94 bool for_lapack )
95 {
96 Teuchos::LAPACK<int, Real> la;
97 int M = A.numRows();
98 result.reshape( M, M );
99 result.assign( A );
100
101 int info;
102 la.POTRF( Teuchos::EUploChar[uplo], M, result.values(), result.stride(),
103 &info );
104
105 if ( info > 0 )
106 {
107 //std::cout << "cholesky() The matrix A is not positive definite\n";
108 return info;
109 }
110 if ( info < 0 )
111 {
112 std::stringstream msg;
113 msg << "cholesky() POTRF failed\n";
114 msg << "The " << std::abs( info ) << "-th argument had an ";
115 msg << "illegal value";
116 throw( std::runtime_error( msg.str() ) );
117 throw( std::runtime_error( msg.str() ) );
118 }
119
120 // lapack returns the lower/upper triangle of the (symmetric) inverse of A
121 // so we must use symmetry to fill in the entries above/below the diagonal
122 if ( !for_lapack )
123 {
124 if ( uplo == Teuchos::LOWER_TRI )
125 {
126 for ( int j = 1; j < M; j++ )
127 for ( int i = 0; i < j; i++ )
128 result(i,j) = 0.;
129 }
130 else
131 {
132 for ( int j = 1; j < M; j++ )
133 for ( int i = 0; i < j; i++ )
134 result(j,i) = 0.;
135 }
136 }
137
138 return info;
139 };
140
solve_using_cholesky_factor(const RealMatrix & L,const RealMatrix & B,RealMatrix & result,Teuchos::EUplo uplo)141 int solve_using_cholesky_factor( const RealMatrix &L, const RealMatrix& B,
142 RealMatrix& result, Teuchos::EUplo uplo )
143 {
144 Teuchos::LAPACK<int, Real> la;
145 int m( L.numRows() ), num_rhs( B.numCols() );
146 result.reshape( B.numRows(), num_rhs );
147 result.assign( B );
148
149 // Solves the system of linear equations A*X = B with a symmetric
150 // positive definite matrix A=LL' using the Cholesky factorization
151 int info;
152 la.POTRS( Teuchos::EUploChar[uplo], m, num_rhs,
153 L.values(), L.stride(),
154 result.values(), result.stride(), &info );
155 result.reshape( L.numRows(), num_rhs );
156
157 return info;
158 };
159
teuchos_cholesky_solve(RealMatrix & A,RealMatrix & B,RealMatrix & result,Real & rcond)160 int teuchos_cholesky_solve( RealMatrix &A, RealMatrix &B,
161 RealMatrix& result, Real &rcond ){
162 // Copy A so that it is not affected outside the scope of this function
163 RealSymMatrix chol_factor( A.numRows() );
164 for (int j=0; j<A.numRows(); j++)
165 {
166 chol_factor(j,j) = A(j,j);
167 for (int i=j+1; i<A.numRows();i++)
168 {
169 chol_factor(i,j) = A(i,j);
170 chol_factor(j,i) = A(i,j);
171 }
172 }
173 // Copy B so that it is not affected outside the scope of this function
174 RealMatrix rhs( Teuchos::Copy, B, B.numRows(), B.numCols() );
175
176 result.shapeUninitialized( rhs.numRows(), rhs.numCols() );
177 Teuchos::SerialSpdDenseSolver<int, Real> solver;
178 solver.setMatrix( rcp(&chol_factor, false) );
179 solver.setVectors( rcp(&result, false), rcp(&rhs, false) );
180 solver.factorWithEquilibration( true );
181 int info(0);
182 info = solver.factor();
183 //solver.solveToRefinedSolution( true );
184 info = solver.solve();
185 info = solver.reciprocalConditionEstimate( rcond );
186 return info;
187 }
188
cholesky_solve(const RealMatrix & A,const RealMatrix & B,RealMatrix & result,Real & rcond)189 int cholesky_solve(const RealMatrix& A, const RealMatrix& B,
190 RealMatrix& result, Real &rcond)
191 {
192 Teuchos::LAPACK<int, Real> la;
193
194 int m( A.numRows() );
195 RealMatrix L;
196 int info = cholesky( A, L, Teuchos::LOWER_TRI, true );
197
198 if ( info != 0 ) return info;
199
200 // Compute the reciprocal of the condition number of A = L*L**T or A = U**T*U
201 // from its cholesky decompostion computed by POTRF.
202 if ( rcond < 0 )
203 {
204 Real *work = new Real [3*m]; // workspace array
205 int *iwork = new int [m]; // workspace array
206 la.POCON( Teuchos::EUploChar[Teuchos::LOWER_TRI], m, L.values(),
207 L.stride(), A.normOne(), &rcond, work, iwork, &info );
208 delete [] work;
209 delete [] iwork;
210 if ( info < 0 )
211 {
212 std::cout << "cholesky_solve() Incorrect arguments specified to ";
213 std::cout << "POCON()\n";
214 return info;
215 }
216 }
217
218 info = solve_using_cholesky_factor( L, B, result, Teuchos::LOWER_TRI );
219
220 return info;
221 };
222
qr_factorization(const RealMatrix & A,RealMatrix & Qfactor,RealMatrix & Rfactor)223 void qr_factorization( const RealMatrix &A, RealMatrix &Qfactor,
224 RealMatrix &Rfactor ){
225 Teuchos::LAPACK<int, Real> la;
226 int M( A.numRows() ), N( A.numCols() );
227 int K( std::min(M,N) );
228 RealMatrix qr_data( Teuchos::Copy, A, M, N );
229 /*RealMatrix qr_data( M, std::max( M, N ), false );
230 // Copy A into qr_data but if overdetermined allocate enough memory
231 // to store Q when passed to dorgrq.
232 for (int j=0; j<N; j++)
233 for (int i=0; i<M; i++)
234 qr_data(i,j) = A(i,j);*/
235
236 //qr_data.shapeUninitialized( M, N );
237 //qr_data.assign( A );
238
239 //---------------------------------//
240 // Get the optimal work array size //
241 //---------------------------------//
242
243 int lwork; // Size of Teuchos::LAPACK work array
244 Real *work; // Teuchos::LAPACK work array
245 int info; // Teuchos::LAPACK output flag
246 lwork = -1; // special code for workspace query
247 work = new Real [1]; // temporary work array
248 //tau.sizeUninitialized( K ) ;// scalar factors of reflectors
249 RealVector tau( K, false );
250 la.GEQRF( M, N, qr_data.values(), qr_data.stride(), tau.values(),
251 work, lwork, &info );
252 lwork = (int)work[0]; // optimal work array size returned by query
253 delete [] work;
254
255 //------------------------------//
256 // Compute the QR factorization //
257 //------------------------------//
258 work = new Real [lwork]; // Optimal work array
259 la.GEQRF( M, N, qr_data.values(), qr_data.stride(), tau.values(),
260 work, lwork, &info );
261 delete [] work;
262
263 //-------------------//
264 // Form the R factor //
265 //-------------------//
266
267 Rfactor.shape( M, N ); // initialize to zero
268 for (int j=0; j<N; j++)
269 for (int i=0; i<=std::min(j,M-1); i++)
270 Rfactor(i,j) = qr_data(i,j);
271
272 //-------------------//
273 // Form the Q factor //
274 //-------------------//
275
276 /*No economoy version.
277 "economy size" R. If m>n, R has only n rows.
278 If m<=n, this is the same as R = qr(A).
279
280 // Note ORGQR give the equivalent of Q from qr(econ) in matlab
281 // that is only the first N columns.
282 // But I want all M columns so I use ORMQR to multiply the MxM identity by Q
283 Qfactor.shape( M, M );; // initialize to zero
284 for (int i=0; i<M; i++)
285 Qfactor(i,i) = 1.;
286
287 // query optimal work size
288 lwork = -1; // special code for workspace query
289 work = new Real [1]; // temporary work array
290 int LDA = qr_data.stride();
291 la.ORMQR( 'L', 'N', M, M, K, qr_data.values(), LDA,
292 tau.values(), Qfactor.values(), Qfactor.stride(),
293 work, lwork, &info );
294 lwork = (int)work[0]; // optimal work array size returned by query
295 delete [] work;
296
297 work = new Real [lwork]; // Optimal work array
298 la.ORMQR( 'L', 'N', M, M, K, qr_data.values(), LDA,
299 tau.values(), Qfactor.values(), Qfactor.stride(),
300 work, lwork, &info );
301 delete [] work;
302 */
303
304 // NESTA needs economy version
305 Qfactor.shape( M, N );; // initialize to zero
306 for (int i=0; i<M; i++)
307 Qfactor(i,i) = 1.;
308
309 // query optimal work size
310 lwork = -1; // special code for workspace query
311 work = new Real [1]; // temporary work array
312 la.ORGQR( M, N, tau.length(), qr_data.values(), qr_data.stride(),
313 tau.values(), work, lwork, &info );
314 lwork = (int)work[0]; // optimal work array size returned by query
315 delete [] work;
316
317 work = new Real [lwork]; // Optimal work array
318 la.ORGQR( M, N, tau.length(), qr_data.values(), qr_data.stride(),
319 tau.values(), work, lwork, &info );
320 delete [] work;
321
322 Qfactor.shapeUninitialized( M, N );
323 for (int j=0; j<N; j++)
324 for (int i=0; i<M; i++)
325 Qfactor(i,j) = qr_data(i,j);
326
327
328
329 }
330
331 /*void qr_solve( const RealMatrix &B, const RealMatrix &qr_data,
332 const RealVector &tau, RealMatrix &result ){
333 Teuchos::LAPACK<int, Real> la;
334 int M( B.numRows() ), num_rhs( B.numCols() );
335 int K = tau.length();
336 RealMatrix QtB( Teuchos::Copy, B, M, num_rhs );
337
338 // query optimal work size
339 int info ;
340 int lwork = -1; // special code for workspace query
341 Real *work = new Real [1]; // temporary work array
342 la.ORMQR( 'L', 'T', M, num_rhs, K, qr_data.values(),
343 qr_data.stride(), tau.values(), QtB.values(), QtB.stride(),
344 work, lwork, &info );
345 lwork = (int)work[0]; // optimal work array size returned by query
346 delete [] work;
347 work = new Real [lwork]; // Optimal work array
348 la.ORMQR( 'L', 'T', M, num_rhs, K, qr_data.values(),
349 qr_data.stride(), tau.values(), QtB.values(), QtB.stride(),
350 work, lwork, &info );
351
352 substitution_solve( qr_data, QtB, result, Teuchos::NO_TRANS,
353 Teuchos::UPPER_TRI, Teuchos::NON_UNIT_DIAG );
354 delete [] work;
355 }*/
356
qr_solve(const RealMatrix & A,const RealMatrix & B,RealMatrix & result,Teuchos::ETransp trans)357 void qr_solve( const RealMatrix &A, const RealMatrix &B, RealMatrix &result,
358 Teuchos::ETransp trans )
359 {
360 Teuchos::LAPACK<int, Real> la;
361
362 RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
363 int M( A.numRows() ), N( A.numCols() ), num_rhs( B.numCols() );
364 RealMatrix B_copy( Teuchos::Copy, B, B.numRows(), B.numCols() );;
365 B_copy.reshape( std::max( M, N ), num_rhs );
366 //---------------------------------//
367 // Get the optimal work array size //
368 //---------------------------------//
369
370 int lwork; // Size of Teuchos::LAPACK work array
371 Real *work; // Teuchos::LAPACK work array
372 int info=0; // Teuchos::LAPACK output flag
373 int lda = A_copy.stride();
374 int ldb = B_copy.stride();
375
376 lwork = -1; // special code for workspace query
377 work = new Real [1]; // temporary work array
378 la.GELS( Teuchos::ETranspChar[trans], M, N, num_rhs, A_copy.values(),
379 lda, B_copy.values(), ldb, work, lwork, &info );
380 lwork = (int)work[0]; // optimal work array size returned by query
381 delete [] work;
382 work = new Real [lwork]; // Optimal work array
383
384 //---------------------------------//
385 // Solve Ax = b //
386 //---------------------------------//
387 la.GELS( Teuchos::ETranspChar[trans], M, N, num_rhs, A_copy.values(), lda,
388 B_copy.values(), ldb, work, lwork, &info );
389 if ( info < 0 )
390 {
391 std::stringstream msg;
392 msg << "qr_solve() dgels failed. ";
393 msg << "The " << std::abs( info ) << "-th argument had an ";
394 msg << "illegal value";
395 throw( std::runtime_error( msg.str() ) );
396 }
397 if ( info > 0 )
398 {
399 std::stringstream msg;
400 msg << "QR Solve dgels failed. ";
401 msg << "The " << info << "-th diagonal element of the ";
402 msg << "triangular factor of A is zero, so that A does not have";
403 msg << "full rank; the least squares solution could not be computed.";
404 throw( std::runtime_error( msg.str() ) );
405 }
406 delete [] work;
407 result.reshape( N, num_rhs );
408 for ( int j = 0; j < num_rhs; j++ )
409 for ( int i = 0; i < N; i++ )
410 result(i,j) = B_copy(i,j);
411 };
412
413
svd_solve(const RealMatrix & A,const RealMatrix & B,RealMatrix & result_0,RealVector & result_1,int & rank,Real rcond)414 void svd_solve( const RealMatrix &A, const RealMatrix &B, RealMatrix &result_0,
415 RealVector &result_1, int &rank, Real rcond )
416 {
417 Teuchos::LAPACK<int, Real> la;
418
419 //-----------------//
420 // Allocate memory //
421 //-----------------//
422
423 int M( A.numRows() ), N( A.numCols() ), num_rhs( B.numCols() );
424
425 if (num_rhs<1)
426 throw(std::runtime_error("B has zero columns"));
427
428 RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
429 result_1.sizeUninitialized( std::min( M, N ) );
430
431 //---------------------------------//
432 // Get the optimal work array size //
433 //---------------------------------//
434
435 int lwork; // Size of Teuchos::LAPACK work array
436 Real *work; // Teuchos::LAPACK work array
437 int info; // Teuchos::LAPACK output flag
438 int lda = A_copy.stride();
439 int ldb = std::max( std::max( B.stride(), lda ), N );
440
441 result_0.shapeUninitialized( M, num_rhs );
442 result_0.assign( B );
443 result_0.reshape( ldb, num_rhs );
444
445 lwork = -1; // special code for workspace query
446 work = new Real [1]; // temporary work array
447 la.GELSS( M, N, num_rhs, A_copy.values(), lda, result_0.values(), ldb,
448 result_1.values(), rcond, &rank, work, lwork, &info );
449 lwork = (int)work[0]; // optimal work array size returned by query
450
451 delete [] work;
452 work = new Real [lwork]; // Optimal work array
453
454 //---------------------------------//
455 // Solve Ax = b //
456 //---------------------------------//
457 la.GELSS( M, N, num_rhs, A_copy.values(), lda, result_0.values(), ldb,
458 result_1.values(), rcond, &rank, work, lwork, &info );
459
460 result_0.reshape( N, num_rhs );
461 delete [] work;
462 };
463
cholesky_factorization_update_insert_column(RealMatrix & A,RealMatrix & U,RealMatrix & col,int iter,Real delta)464 int cholesky_factorization_update_insert_column( RealMatrix &A, RealMatrix &U,
465 RealMatrix &col, int iter,
466 Real delta )
467 {
468 int info( 0 );
469 Real col_norm = col.normFrobenius();
470 if ( iter == 0 )
471 {
472 U(0,0) = std::sqrt( col_norm * col_norm + delta );
473 }
474 else
475 {
476 if ( iter >= U.numRows() )
477 throw( std::runtime_error("cholesky_factorization_update_insert_column: iter out of bounds") );
478
479 RealMatrix w;//( iter, 1, false );
480 RealMatrix U_old( Teuchos::View, U, iter, iter, 0, 0 );
481 // compute column k in gramian matrix A'A
482 RealMatrix gramian_col( iter, 1, false );
483 gramian_col.multiply( Teuchos::TRANS, Teuchos::NO_TRANS,
484 1.0, A, col, 0.0 );
485 substitution_solve( U_old, gramian_col, w, Teuchos::TRANS,
486 Teuchos::UPPER_TRI );
487 Real w_norm = w.normFrobenius();
488
489 if ( col_norm * col_norm + delta - w_norm*w_norm <=
490 std::numeric_limits<Real>::epsilon() )
491 {
492 // New column is colinear. That is, it is in the span of the active
493 // set
494 info = 1;
495 }
496 else
497 {
498 U(iter,iter) = std::sqrt(( col_norm*col_norm + delta )-w_norm*w_norm );
499 RealMatrix U_col( Teuchos::View, U, iter, 1, 0, iter );
500 U_col.assign( w );
501 }
502 }
503 return info;
504 };
505
givens_rotation(RealVector & x,RealVector & x_rot,RealMatrix & givens_matrix)506 void givens_rotation( RealVector &x, RealVector &x_rot, RealMatrix &givens_matrix )
507 {
508 givens_matrix.reshape( 2, 2 );
509 x_rot.sizeUninitialized( x.length() );
510
511 if ( x[1] == 0 )
512 {
513 //givens_matrix = I
514 givens_matrix(0,0) = 1.0; givens_matrix(1,1) = 1.0;
515 x_rot.assign( x );
516 }
517 else
518 {
519 Real x_norm = x.normFrobenius();
520
521 givens_matrix(0,0) = x[0] / x_norm;
522 givens_matrix(0,1) = x[1] / x_norm;
523 givens_matrix(1,0) = -x[1] / x_norm;
524 givens_matrix(1,1) = x[0] / x_norm;
525
526 x_rot[0] = x_norm;
527 x_rot[1] = 0.0;
528 }
529 };
530
cholesky_factorization_update_delete_column(RealMatrix & U,int col_index,int N)531 void cholesky_factorization_update_delete_column( RealMatrix &U,
532 int col_index,
533 int N )
534 {
535 if ( col_index != N - 1 )
536 {
537 // delete column but do not resize the matrix U
538 delete_column( col_index, U, false );
539 };
540
541 RealVector x( 2, false );
542 for ( int n = col_index; n < N-1; n++ )
543 {
544 RealMatrix givens_matrix;
545 RealVector x_rot;
546 x[0] = U(n,n); x[1] = U(n+1,n);
547 givens_rotation( x, x_rot, givens_matrix );
548 U(n,n) = x_rot[0]; U(n+1,n) = x_rot[1];
549 if ( n < N - 2 )
550 {
551 RealMatrix U_sub( Teuchos::View, U, 2, N - n - 1, n, n + 1 );
552 RealMatrix U_sub_rot( U_sub.numRows(), U_sub.numCols(), false );
553 U_sub_rot.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
554 1.0, givens_matrix, U_sub, 0.0 );
555 U_sub.assign( U_sub_rot );
556 }
557 }
558
559 // Zero out last row and column of U
560 for ( int m = 0; m < N; m++ ) U(m,N-1) = 0.0;
561 for ( int n = 0; n < N; n++ ) U(N-1,n) = 0.0;
562 };
563
conjugate_gradients_solve(const RealMatrix & A,const RealVector & b,RealVector & x,Real & relative_residual_norm,int & iters_taken,Real cg_tol,int max_iter,int verbosity)564 int conjugate_gradients_solve( const RealMatrix &A, const RealVector &b, RealVector &x,
565 Real &relative_residual_norm,
566 int &iters_taken,
567 Real cg_tol, int max_iter,
568 int verbosity )
569 {
570 int info( 0 );
571
572 int M( A.numRows() ), N( A.numCols() );
573
574 if ( x.length() != N )
575 // Initialize to zero
576 x.size( N );
577
578 RealVector current_x(Teuchos::Copy, x.values(), x.length());
579
580
581 Real b_norm = b.normFrobenius();
582 RealVector residual(Teuchos::Copy, b.values(), b.length());
583 residual.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
584 -1.0, A, current_x, 1.0 );
585
586 if ( b_norm < std::numeric_limits<Real>::epsilon() )
587 b_norm = 1.0;
588
589 relative_residual_norm = residual.normFrobenius() / b_norm ;
590
591 if ( Teuchos::ScalarTraits<Real>::isnaninf( relative_residual_norm ) )
592 {
593 // At least one of the matrix inputs contains nan of inf
594 if ( verbosity > 2 )
595 {
596 std::stringstream msg;
597 msg << "conjugate_gradient_solve() Warning: at least one of the ";
598 msg << "matrix inputs contains nan and/or inf.\n";
599 std::cout << msg.str();
600 }
601 info = 3;
602 return info;
603 }
604
605 if ( relative_residual_norm <= cg_tol )
606 {
607 info = 0;
608 return info;
609 }
610
611 if ( verbosity > 2 )
612 {
613 std::cout << "CG iteration: " << 0 << ", residual: ";
614 std::cout << relative_residual_norm << "\n";
615 }
616
617 RealVector p( residual );
618 RealVector Ap( A.numRows() );
619
620 iters_taken = 0;
621 bool done( false );
622 Real rtr_old( residual.dot( residual ) );
623
624 while ( !done )
625 {
626 Ap.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
627 1.0, A, p, 0.0 );
628
629 Real ptAp = p.dot( Ap );
630
631 Real alpha = rtr_old / ptAp;
632
633 if ( Teuchos::ScalarTraits<Real>::isnaninf( alpha ) || ( ptAp <= 0 ) )
634 {
635 if ( verbosity > 2 )
636 {
637 // The matrix is not positive definite
638 std::stringstream msg;
639 msg << "conjugate_gradient_solve() Warning: A is not postive ";
640 msg << "definite.\n";
641 std::cout << msg.str();
642 }
643 info = 2;
644 return info;
645 }
646
647 for ( int n = 0; n < N; n++ )
648 current_x[n] += alpha * p[n];
649
650 if ( (iters_taken+1)%50 == 0 )
651 {
652 // Avoid numerical drift due to rounding error
653 residual.assign( b );
654 residual.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
655 -1.0, A, current_x, 1.0 );
656 }
657 else
658 {
659 for ( int m = 0; m < M; m++ )
660 residual[m] -= alpha * Ap[m];
661 }
662
663 Real rtr = residual.dot( residual );
664
665 for ( int m = 0; m < M; m++ )
666 p[m] = residual[m] + rtr / rtr_old * p[m];
667
668 rtr_old = rtr;
669
670 Real current_relative_residual_norm = std::sqrt( rtr_old ) / b_norm;
671
672 if ( current_relative_residual_norm < relative_residual_norm )
673 {
674 relative_residual_norm = current_relative_residual_norm;
675 for ( int n = 0; n < N; n++ )
676 x[n] = current_x[n];
677 }
678
679 if ( verbosity > 2 )
680 {
681 std::cout << "CG iteration: " << iters_taken + 1<< ", residual: ";
682 std::cout << current_relative_residual_norm << "\n";
683 }
684
685 if ( current_relative_residual_norm < cg_tol )
686 {
687 if ( verbosity > 2 )
688 {
689 std::stringstream msg;
690 msg << "conjugate_gradient_solve() Exiting residual below ";
691 msg << "tolerance.\n";
692 std::cout << msg.str();
693 }
694 done = true;
695 info = 0;
696 }
697
698 iters_taken++;
699 if ( iters_taken == max_iter )
700 {
701 if ( verbosity > 2 )
702 {
703 std::stringstream msg;
704 msg << "conjugate_gradient_solve() Exiting maximum number of ";
705 msg << "iterations reached.\n";
706 std::cout << msg.str();
707 }
708 done = true;
709 info = 1;
710 }
711 }
712 return info;
713 };
714
715
equality_constrained_least_squares_solve(const RealMatrix & A,const RealVector & b,const RealMatrix & C,const RealVector & d,RealVector & x,int verbosity)716 void equality_constrained_least_squares_solve( const RealMatrix &A,
717 const RealVector &b,
718 const RealMatrix &C,
719 const RealVector &d,
720 RealVector &x,
721 int verbosity )
722 {
723 RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() ),
724 C_copy( Teuchos::Copy, C, C.numRows(), C.numCols() );
725 RealVector b_copy( Teuchos::Copy, b.values(), b.length() ),
726 d_copy( Teuchos::Copy, d.values(), d.length() );
727
728 int M( A_copy.numRows() ), N( A_copy.numCols() ), lda( A_copy.stride() ),
729 ldc( C_copy.stride() );
730
731 x.sizeUninitialized( N );
732
733 Teuchos::LAPACK<int, Real> la;
734 double* work; // LAPACK work array
735 int info( 0 ); // LAPACK output flag
736 int lwork; // size of LAPACK work array
737 int num_cons( C_copy.numRows() ); // number of equality constraints
738
739 // Get the optimal work array size
740 lwork = -1; // special code for workspace query
741 work = new double [1]; // temporary work array
742 la.GGLSE( M, N, num_cons, A_copy.values(), lda,
743 C_copy.values(), ldc, b_copy.values(), d_copy.values(), x.values(),
744 work, lwork, &info );
745 lwork = (int)work[0]; // optimal work array size returned by query
746 delete [] work;
747 work = new double [lwork]; // Optimal work array
748
749 // Least squares computation using LAPACK's DGGLSE subroutine which uses
750 // a GRQ factorization method for solving the eq-constrained LLS problem
751 info = 0;
752 la.GGLSE( M, N, num_cons, A_copy.values(), lda, C_copy.values(),
753 ldc, b_copy.values(), d_copy.values(), x.values(),
754 work, lwork, &info );
755 delete [] work;
756
757 if ( info < 0 )
758 {
759 std::stringstream msg;
760 msg << "equality_constrained_least_squares() dgglse failed. ";
761 msg << "The " << std::abs( info ) << "-th argument had an ";
762 msg << "illegal value";
763 throw( std::runtime_error( msg.str() ) );
764 }
765 if ( info == 1 )
766 {
767 std::stringstream msg;
768 msg << "the upper triangular factor R associated with C in the ";
769 msg << "generalized RQ factorization of the pair (C, A) is ";
770 msg << "singular, so that rank(C) < num_cons; the least squares ";
771 msg << "solution could not be computed.";
772 throw( std::runtime_error( msg.str() ) );
773 }
774 if ( info == 2 )
775 {
776 std::stringstream msg;
777 msg << "the (N-P) by (N-P) part of the upper trapezoidal factor ";
778 msg << "T associated with A in the generalized RQ factorization ";
779 msg << "of the pair (C, A) is singular, so that\n";
780 msg << "rank( (A) ) < N; the least squares solution could not\n";
781 msg << " ( (C) )\n";
782 msg << "be computed.";
783 throw( std::runtime_error( msg.str() ) );
784 }
785 }
786
cholesky_inverse(RealMatrix & U,RealMatrix & result,Teuchos::EUplo uplo)787 void cholesky_inverse( RealMatrix &U, RealMatrix &result, Teuchos::EUplo uplo )
788 {
789 Teuchos::LAPACK<int, Real> la;
790 int N = U.numRows();
791 // copy U into result. Lapack will compute result inplace
792 result.shapeUninitialized( N, N );
793 result.assign( U );
794
795 // extract arguments needed for lapack call
796 int lda( result.stride() );
797 int info( 0 );
798 la.POTRI( Teuchos::EUploChar[uplo], N, result.values(), lda, &info );
799
800 if ( info < 0 )
801 {
802 std::stringstream msg;
803 msg << "cholesky_inverse() dpotri failed. ";
804 msg << "The " << std::abs( info ) << "-th argument had an ";
805 msg << "illegal value";
806 throw( std::runtime_error( msg.str() ) );
807 }
808 else if ( info > 0 )
809 {
810 std::stringstream msg;
811 msg << "cholesky_inverse() dpotri failed. ";
812 msg << "The (" << info << "," << info << ") element of the factor U or L is ";
813 msg << "zero and the inverse could not be computed";
814 throw( std::runtime_error( msg.str() ) );
815 }
816
817 // lapack returns the lower/upper triangle of the (symmetric) inverse of A
818 // so we must use symmetry to fill in the entries above/below the diagonal
819 if ( uplo == Teuchos::LOWER_TRI )
820 {
821 for ( int j = 1; j < N; j++ )
822 for ( int i = 0; i < j; i++ )
823 result(i,j) = result(j,i);
824 }
825 else
826 {
827 for ( int j = 1; j < N; j++ )
828 for ( int i = 0; i < j; i++ )
829 result(j,i) = result(i,j);
830 }
831 };
832
pivoted_qr_factorization(const RealMatrix & A,RealMatrix & Q,RealMatrix & R,IntVector & p)833 void pivoted_qr_factorization( const RealMatrix &A, RealMatrix &Q, RealMatrix &R,
834 IntVector &p )
835 {
836 Teuchos::LAPACK<int, Real> la;
837
838 RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
839 int M( A.numRows() ), N( A.numCols() ), K( std::min( M, N ) );
840
841 //Q.shapeUninitialized( M, K );
842 Q.shape( M, K );
843 R.shape( K, N ); // must be initialized to 0
844 p.size( N ); // must be initialized to 0
845
846 //---------------------------------//
847 // Get the optimal work array size //
848 //---------------------------------//
849
850 int lwork; // Size of Teuchos::LAPACK work array
851 Real *work; // Teuchos::LAPACK work array
852 int info; // Teuchos::LAPACK output flag
853 int lda = std::max( 1, A_copy.stride() );
854 RealVector tau( K, true );
855
856 lwork = -1; // special code for workspace query
857 work = new Real [1]; // temporary work array
858
859 DGEQP3_F77( &M, &N, A_copy.values(), &lda, p.values(), tau.values(),
860 work, &lwork, &info );
861
862 lwork = (int)work[0]; // optimal work array size returned by query
863 delete [] work;
864 work = new Real [lwork]; // Optimal work array
865
866 //---------------------------------//
867 // Compute the QR factorization //
868 //---------------------------------//
869
870 DGEQP3_F77( &M, &N, A_copy.values(), &lda, p.values(), tau.values(),
871 work, &lwork, &info );
872
873 if ( info < 0 )
874 {
875 std::stringstream msg;
876 msg << "privoted_qr_factorization() dgeqp3 failed. ";
877 msg << "The " << std::abs( info ) << "-th argument had an ";
878 msg << "illegal value";
879 throw( std::runtime_error( msg.str() ) );
880 }
881
882 delete [] work;
883
884 //---------------------------------//
885 // Form the Q and R matrices //
886 //---------------------------------//
887
888 for ( int m = 0; m < K; m++ )
889 {
890 for ( int n = m; n < N; n++ )
891 R(m,n) = A_copy(m,n);
892 }
893
894 lwork = -1; // special code for workspace query
895 work = new Real [1]; // temporary work array
896
897 la.ORGQR( M, K, K, A_copy.values(), lda, tau.values(),
898 work, lwork, &info );
899
900 lwork = (int)work[0]; // optimal work array size returned by query
901 delete [] work;
902
903 work = new Real [lwork]; // Optimal work array
904
905 la.ORGQR( M, K, K, A_copy.values(), lda, tau.values(),
906 work, lwork, &info );
907
908 for ( int n = 0; n < K; n++ )
909 {
910 for ( int m = 0; m < M; m++ )
911 {
912 Q(m,n) = A_copy(m,n);
913 }
914 }
915
916 // fortran returns indices 1,...,N
917 // c++ requires 0,...,N-1
918 for ( int n = 0; n < N; n++ )
919 p[n]--;
920
921 delete [] work;
922 }
923
lu_inverse(const RealMatrix & L,const RealMatrix & U,const IntVector & p,RealMatrix & LU_inv)924 void lu_inverse( const RealMatrix &L, const RealMatrix &U, const IntVector &p,
925 RealMatrix &LU_inv )
926 {
927 int M = L.numRows(), N = U.numCols();
928 #ifdef DEBUG
929 if ( M != N )
930 {
931 std::string msg="LU_inverse() The dimensions of L and U are inconsistent";
932 throw( std::runtime_error( msg ) );
933 }
934 #endif
935 LU_inv.shape( M, M );
936 RealMatrix I;
937 eye( M, I );
938 if ( p.length() != 0 )
939 permute_matrix_columns( I, p );
940 RealMatrix X;
941 substitution_solve( L, I, X, Teuchos::NO_TRANS,
942 Teuchos::LOWER_TRI, Teuchos::NON_UNIT_DIAG );
943 substitution_solve( U, X, LU_inv, Teuchos::NO_TRANS,
944 Teuchos::UPPER_TRI, Teuchos::NON_UNIT_DIAG );
945 };
946
lu_solve(RealMatrix & A,const RealMatrix & B,RealMatrix & result,bool copy,Teuchos::ETransp trans)947 void lu_solve( RealMatrix &A,
948 const RealMatrix &B,
949 RealMatrix &result,
950 bool copy,
951 Teuchos::ETransp trans ){
952 Teuchos::LAPACK<int, Real> la;
953 int M = A.numRows(), N = A.numCols();
954
955 RealMatrix A_copy;
956 if ( copy )
957 {
958 A_copy.shapeUninitialized( M, N );
959 A_copy.assign( A );
960 }
961
962 IntVector ipiv( std::min( M, N ), false );
963 int info;
964 if ( copy )
965 la.GETRF( M, N, A_copy.values(), A_copy.stride(), ipiv.values(), &info );
966 else
967 la.GETRF( M, N, A.values(), A.stride(), ipiv.values(), &info );
968
969 if ( info < 0 )
970 {
971 std::stringstream msg;
972 msg << "GETRF: The " << std::abs(info) << "ith argument had " <<
973 "an illegal value";
974 throw( std::runtime_error( msg.str() ) );
975 }
976 if ( info > 0 )
977 {
978 std::stringstream msg;
979 msg << "U(" << info << "," << info << ") is exactly zero. " <<
980 "The factorization has been completed, but the factor U is exactly " <<
981 "singular, and division by zero will occur if it is used " <<
982 "to solve a system of equations";
983 throw( std::runtime_error( msg.str() ) );
984 }
985 result.shapeUninitialized( B.numRows(), B.numCols() );
986 result.assign( B );
987
988 if ( copy )
989 la.GETRS( Teuchos::ETranspChar[trans], M, result.numCols(), A_copy.values(),
990 A_copy.stride(), ipiv.values(), result.values(), result.stride(),
991 &info );
992 else
993 la.GETRS( Teuchos::ETranspChar[trans], M, result.numCols(), A.values(),
994 A.stride(), ipiv.values(), result.values(), result.stride(),
995 &info );
996
997
998 if ( info < 0 )
999 {
1000 std::stringstream msg;
1001 msg << "GETRS: The " <<std::abs(info) << "ith argument had " <<
1002 "an illegal value";
1003 throw( std::runtime_error( msg.str() ) );
1004 }
1005 };
1006
pivoted_lu_factorization(const RealMatrix & A,RealMatrix & L,RealMatrix & U,IntVector & pivots)1007 void pivoted_lu_factorization( const RealMatrix &A,
1008 RealMatrix &L,
1009 RealMatrix &U,
1010 IntVector &pivots )
1011 {
1012 Teuchos::LAPACK<int, Real> la;
1013
1014 RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
1015 int M( A.numRows() ), N( A.numCols() ), K( std::min( M, N ) );
1016
1017 int info;
1018 IntVector la_pivots( K, false );
1019 la.GETRF( M, N, A_copy.values(), A_copy.stride(), la_pivots.values(), &info );
1020
1021 if ( info < 0 )
1022 {
1023 std::stringstream msg;
1024 msg << "GETRF: The " << std::abs(info) << "ith argument had " <<
1025 "an illegal value";
1026 throw( std::runtime_error( msg.str() ) );
1027 }
1028 if ( info > 0 )
1029 {
1030 std::stringstream msg;
1031 msg << "U(" << info << "," << info << ") is exactly zero. " <<
1032 "The factorization has been completed, but the factor U is exactly " <<
1033 "singular, and division by zero will occur if it is used " <<
1034 "to solve a system of equations";
1035 throw( std::runtime_error( msg.str() ) );
1036 }
1037
1038 L.shape( M, K );
1039 U.shape( K, N );
1040
1041 // Fill U
1042 for ( int n = 0; n < N; n++ )
1043 {
1044 if ( n < K ) U(n,n) = A_copy(n,n);
1045 for ( int m = 0; m < std::min(n,K); m++ )
1046 U(m,n) = A_copy(m,n);
1047 }
1048
1049 for ( int n = 0; n < K; n++ )
1050 {
1051 L(n,n) = 1.;
1052 for ( int m = n+1; m < M; m++ )
1053 L(m,n) = A_copy(m,n);
1054 }
1055
1056 // Lapack does not support integers so must cast.
1057 // TODO write implementation for integers
1058 RealVector dbl_pivots( M, false );
1059 for ( int n = 0; n < dbl_pivots.length(); n++ )
1060 dbl_pivots[n] = (double)n;
1061
1062 // convert pivots to a format that supports slicing (e.g python) to swap rows
1063 RealVector row_pivots;
1064 pivot_matrix_rows( dbl_pivots, la_pivots, 1, true, row_pivots );
1065
1066 pivots.sizeUninitialized( K );
1067 for ( int n = 0; n < K; n++ )
1068 pivots[n] = (int)row_pivots[n];
1069
1070 }
1071
1072
pivoted_cholesky_factorization(RealMatrix & A,RealMatrix & L,IntVector & p,int & rank,double tol)1073 void pivoted_cholesky_factorization( RealMatrix &A, RealMatrix &L, IntVector &p,
1074 int &rank, double tol )
1075 {
1076 Teuchos::LAPACK<int, Real> la;
1077
1078 RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
1079 int N( A.numRows() );
1080
1081 L.shape( N, N );
1082 p.size( N ); // must be initialized to 0
1083
1084 //------------------------------------//
1085 // Compute the cholesky factorization //
1086 //------------------------------------//
1087
1088 Real *work; // Teuchos::LAPACK work array
1089 int info; // Teuchos::LAPACK output flag
1090 int lda = std::max( 1, A_copy.stride() );
1091 rank = 0;
1092
1093 work = new Real [2*N]; // temporary work array
1094
1095 //Teuchos::EUplo uplo
1096 //Teuchos::EUploChar[uplo]
1097 char uplo = 'L';
1098 DPSTRF_F77( &uplo, &N, A_copy.values(),
1099 &lda, p.values(), &rank, &tol, work, &info );
1100 delete [] work;
1101
1102 if ( info < 0 )
1103 {
1104 std::stringstream msg;
1105 msg << "privoted_cholesky_factorization() dpstrf failed. ";
1106 msg << "The " << std::abs( info ) << "-th argument had an ";
1107 msg << "illegal value";
1108 throw( std::runtime_error( msg.str() ) );
1109 }
1110 else if ( info > 0 )
1111 {
1112 std::stringstream msg;
1113 msg << "privoted_cholesky_factorization() dpstrf failed. ";
1114 msg << "The matrix A is either rank deficient with computed rank " << rank
1115 << " , or is indefinite. See Section 7 of "
1116 << "LAPACK Working Note #161 for further information.\n";
1117 //std::cout << msg.str();
1118 }
1119
1120 // Fill upper triangular part of cholesky factor
1121 for ( int m = 0; m < N; m++ )
1122 {
1123 for ( int n = 0; n < m+1; n++ )
1124 L(m,n) = A_copy(m,n);
1125 }
1126
1127 // fortran returns indices 1,...,N
1128 // c++ requires 0,...,N-1
1129 for ( int n = 0; n < N; n++ )
1130 p[n]--;
1131 }
1132
cholesky_condition_number(RealMatrix & L)1133 Real cholesky_condition_number( RealMatrix &L )
1134 {
1135 Teuchos::LAPACK<int, Real> la;
1136 int m( L.numRows() );
1137 Real *work = new Real [3*m]; // workspace array
1138 int *iwork = new int [m]; // workspace array
1139 Real rcond = 0.;
1140 int info;
1141 la.POCON( Teuchos::EUploChar[Teuchos::LOWER_TRI], m, L.values(),
1142 L.stride(), L.normOne(), &rcond, work, iwork, &info );
1143 delete [] work;
1144 delete [] iwork;
1145 if ( info < 0 )
1146 {
1147 std::stringstream msg;
1148 msg << "cholesky_condition_number() Incorrect arguments specified to "
1149 << "POCON()\n";
1150 throw( std::runtime_error( msg.str() ) );
1151 }
1152 return rcond;
1153 }
1154
symmetric_eigenvalue_decomposition(const RealMatrix & A,RealVector & eigenvalues,RealMatrix & eigenvectors)1155 void symmetric_eigenvalue_decomposition( const RealMatrix &A,
1156 RealVector &eigenvalues,
1157 RealMatrix &eigenvectors )
1158 {
1159 Teuchos::LAPACK<int, Real> la;
1160
1161 int N( A.numRows() );
1162 eigenvectors.shapeUninitialized( N, N );
1163 eigenvectors.assign( A );
1164
1165 char jobz = 'V'; // compute eigenvectors
1166
1167 char uplo = 'U'; // assume only upper triangular part of matrix is stored
1168
1169 eigenvalues.sizeUninitialized( N );
1170
1171 int info; // Teuchos::LAPACK output flag
1172 RealVector work; // Teuchos::LAPACK work array;
1173 int lwork = -1; // Size of Teuchos::LAPACK work array
1174
1175 // Compute optimal size of work array
1176 work.sizeUninitialized( 1 ); // temporary work array
1177 la.SYEV( jobz, uplo, N, eigenvectors.values(), eigenvectors.stride(),
1178 eigenvalues.values(), work.values(), lwork, &info );
1179
1180 lwork = (int)work[0];
1181 work.sizeUninitialized( lwork );
1182
1183 la.SYEV( jobz, uplo, N, eigenvectors.values(), eigenvectors.stride(),
1184 eigenvalues.values(), work.values(), lwork, &info );
1185
1186 if ( info > 0 )
1187 {
1188 std::stringstream msg;
1189 msg << "The algorithm failed to converge." << info
1190 << " off-diagonal elements of an intermediate tridiagonal "
1191 << "form did not converge to zero.";
1192 throw( std::runtime_error( msg.str() ) );
1193 }
1194 else if ( info < 0 )
1195 {
1196 std::stringstream msg;
1197 msg << " The " << std::abs( info ) << " argument had an illegal value.";
1198 throw( std::runtime_error( msg.str() ) );
1199 }
1200 };
1201
complete_pivoted_lu_factorization(const RealMatrix & A,RealMatrix & L_factor,RealMatrix & U_factor,IntVector & row_pivots,IntVector & column_pivots,int max_iters)1202 void complete_pivoted_lu_factorization( const RealMatrix &A,
1203 RealMatrix &L_factor,
1204 RealMatrix &U_factor,
1205 IntVector &row_pivots,
1206 IntVector &column_pivots,
1207 int max_iters )
1208 {
1209 int num_rows = A.numRows(), num_cols = A.numCols();
1210 max_iters = std::min( max_iters, std::min( num_rows, num_cols ) );
1211
1212 RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
1213 row_pivots.sizeUninitialized( num_rows );
1214 for ( int i = 0; i < num_rows; i++ )
1215 row_pivots[i] = i;
1216 column_pivots.sizeUninitialized( num_cols );
1217 for ( int j = 0; j < num_cols; j++ )
1218 column_pivots[j] = j;
1219
1220 RealVector max_col_values( num_cols, false );
1221 IntVector argmax_col_values( num_cols, false );
1222 RealVector temp_row( num_cols, false );
1223 RealVector temp_col( num_rows, false );
1224 for ( int k = 0; k < std::min( num_rows-1, num_cols ); k++ )
1225 {
1226 Real max_pivot = -1.;
1227 int pivot_column = k, pivot_row = k;
1228 max_col_values = -1.;
1229 for ( int j = k; j < num_cols; j++ )
1230 {
1231 for ( int i = k; i < num_rows; i++ )
1232 {
1233 Real pivot = std::abs( A_copy(i,j) );
1234 if ( pivot > max_col_values[j-k] )
1235 {
1236 max_col_values[j-k] = pivot;
1237 argmax_col_values[j-k] = i-k;
1238 if ( pivot > max_pivot )
1239 {
1240 max_pivot = pivot;
1241 pivot_column = j-k;
1242 }
1243 }
1244 }
1245 }
1246 pivot_row = argmax_col_values[pivot_column]+k;
1247 pivot_column += k;
1248
1249 // update pivot vectors
1250 int temp_index = row_pivots[k];
1251 row_pivots[k] = row_pivots[pivot_row];
1252 row_pivots[pivot_row] = temp_index;
1253 temp_index = column_pivots[k];
1254 column_pivots[k] = column_pivots[pivot_column];
1255 column_pivots[pivot_column] = temp_index;
1256
1257 // swap rows
1258 for ( int j = 0; j < num_cols; j++ )
1259 {
1260 Real temp_value = A_copy(k,j);
1261 A_copy(k,j) = A_copy(pivot_row,j);
1262 A_copy(pivot_row,j) = temp_value;
1263 }
1264
1265 // swap columns
1266 for ( int i = 0; i < num_rows; i++ )
1267 {
1268 Real temp_value = A_copy(i,k);
1269 A_copy(i,k) = A_copy(i,pivot_column);
1270 A_copy(i,pivot_column) = temp_value;
1271 }
1272
1273 // check for singularity
1274 if ( std::abs( A_copy(k,k) ) < 1e-10 )
1275 {
1276 std::cout << "pivot " << std::abs( A_copy(k,k) ) << " is to small. "
1277 << "Stopping factorization.\n";
1278 break;
1279 }
1280
1281 // update LU factoization
1282 for ( int i = k+1; i < num_rows; i++ )
1283 A_copy(i,k) /= A_copy(k,k);
1284
1285 RealMatrix sub_matrix( Teuchos::View, A_copy, num_rows-k-1, num_cols-k-1,
1286 k+1, k+1 );
1287 RealMatrix col_vector( Teuchos::View, A_copy, num_rows-k-1, 1, k+1, k );
1288 RealMatrix row_vector( Teuchos::View, A_copy, 1, num_cols-k-1, k, k+1 );
1289
1290 sub_matrix.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
1291 -1.0, col_vector, row_vector, 1.0 );
1292
1293 if ( k >= max_iters )
1294 break;
1295 }
1296
1297 // final column swap if num_cols > num_rows
1298 if ( num_cols > num_rows && num_cols >= max_iters )
1299 {
1300 // determine pivot which is the maximum entry in A_copy
1301 Real max_entry = 0.;
1302 int pivot_column = num_rows-1;
1303 for ( int j = num_rows-1; j < num_cols; j++ )
1304 {
1305 Real pivot = std::abs( A_copy(num_rows-1,j) );
1306 if ( pivot >= max_entry )
1307 {
1308 max_entry = pivot;
1309 pivot_column = j-(num_rows-1);
1310 }
1311 }
1312 pivot_column += num_rows-1;
1313
1314
1315 // update pivot column vector
1316 int temp_index = column_pivots[num_rows-1];
1317 column_pivots[num_rows-1] = column_pivots[pivot_column];
1318 column_pivots[pivot_column] = temp_index;
1319
1320 // swap column
1321 for ( int i = 0; i < num_rows; i++ )
1322 {
1323 Real temp_value = A_copy(i,num_rows-1);
1324 A_copy(i,num_rows-1) = A_copy(i,pivot_column);
1325 A_copy(i,pivot_column) = temp_value;
1326 }
1327 }
1328
1329 // Build L and U matrices
1330 int max_num_rows_cols = std::min( num_rows, num_cols );
1331 L_factor.shape( num_rows, max_num_rows_cols );
1332 U_factor.shape( max_num_rows_cols, num_cols );
1333 for ( int j = 0; j < max_num_rows_cols; j++ )
1334 {
1335 L_factor(j,j) = 1.;
1336 for ( int i = j+1; i < num_rows; i++ )
1337 L_factor(i,j) = A_copy(i,j);
1338 }
1339
1340 for ( int j = 0; j < num_cols; j++ )
1341 {
1342 for ( int i = 0; i < std::min( num_rows, j+1 ); i++ )
1343 U_factor(i,j) = A_copy(i,j);
1344 }
1345 };
1346
1347
1348 /*
1349 Does not seem to work
1350 void complete_pivoted_lu_factorization_lapack( RealMatrix &A,
1351 RealMatrix &L_factor,
1352 RealMatrix &U_factor,
1353 IntVector &row_pivots,
1354 IntVector &column_pivots )
1355 {
1356 RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
1357 int M( A.numRows() ), N( A.numCols() );
1358
1359 if ( M != N )
1360 throw( std::runtime_error("complete_pivoted_lu_factorization: matrix must be square" ) );
1361
1362 row_pivots.sizeUninitialized( N );
1363 column_pivots.sizeUninitialized( N );
1364 row_pivots = -2;
1365 // dgetc2_ does not update pivots correctly seems to exit early.
1366
1367 int lda = A_copy.stride(), info;
1368 disp( N );
1369 disp( lda );
1370 DGETC2_F77( &N, A_copy.values(), &lda, row_pivots.values(),
1371 column_pivots.values(), &info );
1372
1373 disp( info );
1374 disp( row_pivots );
1375 if ( info > 0 )
1376 {
1377 std::stringstream msg;
1378 msg << "U(" << info-1 << "," << info-1 << ") is likely to produce " <<
1379 "overflow if we try to solve for x in Ax=b. So U is peturbed to avoid "
1380 "overflow.\n";
1381 std::cout << msg.str();
1382 }
1383
1384 L_factor.shape( N, N );
1385 U_factor.shape( N, N );
1386
1387 // Fill U
1388 for ( int n = 0; n < N; n++ )
1389 {
1390 for ( int m = 0; m < n; m++ )
1391 U_factor(m,n) = A_copy(m,n);
1392 }
1393
1394 // Fill L
1395 for ( int n = 0; n < N; n++ )
1396 {
1397 L_factor(n,n) = 1.;
1398 for ( int m = n+1; m < N; m++ )
1399 L_factor(m,n) = A_copy(m,n);
1400 }
1401
1402 // fortran returns indices 1,...,N
1403 // c++ requires 0,...,N-1
1404 for ( int n = 0; n < N; n++ )
1405 {
1406 row_pivots[n]--;
1407 column_pivots[n]--;
1408 }
1409 }
1410 */
1411
pivot_matrix_rows(const RealMatrix & A,const IntVector & pivots,int dir,bool fortran_indexing,RealMatrix & result)1412 void pivot_matrix_rows( const RealMatrix &A, const IntVector &pivots, int dir,
1413 bool fortran_indexing, RealMatrix &result )
1414 {
1415 result.shapeUninitialized( A.numRows(), A.numCols() );
1416 result.assign( A );
1417 // Add 1 to pivots because we are using fotran indexing
1418 IntVector pivots_copy( pivots.length(), false );
1419 int shift = 1;
1420 if ( fortran_indexing ) shift = 0;
1421 for ( int i = 0; i < pivots.length(); i++ )
1422 pivots_copy[i] = pivots[i]+shift;
1423 // if dir is negative pivots are applied in reverse order
1424 int N = result.numCols(), LDA = result.stride(), K1 = 1,
1425 K2 = pivots_copy.numRows(), INCX = dir;
1426 DLASWP_F77( &N, result.values(), &LDA, &K1, &K2, pivots_copy.values(), &INCX );
1427 }
1428
1429
truncated_pivoted_lu_factorization(const RealMatrix & A,RealMatrix & L_factor,RealMatrix & U_factor,IntVector & pivots,int max_iters,int num_initial_rows)1430 void truncated_pivoted_lu_factorization( const RealMatrix &A,
1431 RealMatrix &L_factor,
1432 RealMatrix &U_factor,
1433 IntVector &pivots,
1434 int max_iters,
1435 int num_initial_rows )
1436 {
1437
1438 Teuchos::BLAS<int, Real> blas;
1439 int num_rows = A.numRows(), num_cols = A.numCols();
1440 int min_num_rows_cols = std::min( num_rows, num_cols );
1441 max_iters = std::min( max_iters, num_rows );
1442 if ( A.numCols() < max_iters ){
1443 std::string msg = "truncated_pivoted_lu_factorization: ";
1444 msg += " A is inconsistent with max_iters. Try deceasing max_iters or ";
1445 msg += " increasing the number of columns of A";
1446 throw(std::runtime_error( msg ) );
1447 }
1448
1449 // Use L to store both L and U during factoriation then copy out U in post
1450 // proccesing
1451 L_factor.shapeUninitialized( num_rows, min_num_rows_cols );
1452 L_factor.assign( A );
1453 pivots.sizeUninitialized( num_rows );
1454 for ( int i = 0; i < num_rows; i++ )
1455 pivots[i] = i;
1456
1457 int iter = 0;
1458 for ( iter = 0; iter < min_num_rows_cols; iter++ )
1459 {
1460 // find best pivot
1461 int pivot =-1;
1462 if ( iter < num_initial_rows ){
1463 pivot = blas.IAMAX( num_initial_rows-iter, &(L_factor[iter][iter]), 1 )
1464 -1+iter;
1465 }else{
1466 pivot = blas.IAMAX( num_rows-iter, &(L_factor[iter][iter]), 1 )-1+iter;
1467 }
1468
1469 // update pivots vector
1470 int temp_index = pivots[iter];
1471 pivots[iter] = pivots[pivot];
1472 pivots[pivot] = temp_index;
1473 // apply pivots(swap rows) in L factorization
1474 for ( int j = 0; j < num_cols; j++ ){
1475 Real temp_value = L_factor(iter,j);
1476 L_factor(iter,j) = L_factor(pivot,j);
1477 L_factor(pivot,j) = temp_value;
1478 }
1479
1480 // check for singularity
1481 if ( std::abs(L_factor(iter,iter))<std::numeric_limits<double>::epsilon()){
1482 std::cout << "pivot " << std::abs( L_factor(iter,iter) )
1483 << " is to small. Stopping factorization.\n";
1484 break;
1485 }
1486 // update L_factor factoization
1487 for ( int i = iter+1; i < num_rows; i++ )
1488 L_factor(i,iter) /= L_factor(iter,iter);
1489
1490 RealMatrix sub_matrix( Teuchos::View, L_factor, num_rows-iter-1,
1491 num_cols-iter-1,
1492 iter+1, iter+1 );
1493 RealMatrix col_vector( Teuchos::View, L_factor, num_rows-iter-1, 1,
1494 iter+1, iter );
1495 RealMatrix row_vector( Teuchos::View, L_factor, 1, num_cols-iter-1, iter,
1496 iter+1 );
1497
1498 sub_matrix.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
1499 -1.0, col_vector, row_vector, 1.0 );
1500 if ( iter >= max_iters-1 )
1501 break;
1502 }
1503
1504 // build L and U matrices
1505
1506 // extract entries of U from L
1507 U_factor.shape( min_num_rows_cols, num_cols );
1508 for ( int j = 0; j < num_cols; j++ )
1509 {
1510 for ( int i = 0; i < std::min( min_num_rows_cols, j+1 ); i++ )
1511 U_factor(i,j) = L_factor(i,j);
1512 }
1513
1514 // zero out U entries in L
1515 L_factor.reshape( iter+1, min_num_rows_cols );
1516 for ( int j = 0; j < min_num_rows_cols; j++ )
1517 {
1518 if ( j < iter+1 ) L_factor(j,j) = 1.;
1519 for ( int i = 0; i<std::min(j,iter+1); i++ )
1520 L_factor(i,j) = 0.;
1521 }
1522 // remove unused pivot entries
1523 pivots.resize( iter+1 );
1524 }
1525
1526 } // namespace util
1527 } // namespace Pecos
1528