1 /*  _______________________________________________________________________
2 
3     PECOS: Parallel Environment for Creation Of Stochastics
4     Copyright (c) 2011, Sandia National Laboratories.
5     This software is distributed under the GNU Lesser General Public License.
6     For more information, see the README file in the top Pecos directory.
7     _______________________________________________________________________ */
8 
9 #include "linear_algebra.hpp"
10 #include "Teuchos_BLAS_wrappers.hpp"
11 #include "Teuchos_LAPACK_wrappers.hpp"
12 #include "Teuchos_SerialSpdDenseSolver.hpp"
13 #include "teuchos_data_types.hpp"
14 #define disp( a ) std::cout << #a << ": " << ( a ) << std::endl
15 
16 // DLASWP and DGEQP3 are in Teuchos_LAPACK_wrappers.hpp
17 #define DPSTRF_F77 F77_BLAS_MANGLE(dpstrf,DPSTRF)
18 // unused:
19 // #define DGETC2_F77 F77_BLAS_MANGLE(dgetc2,DGETC2)
20 // #define DORGRQ_F77 F77_BLAS_MANGLE(dorgrq,DORGRQ)
21 extern "C"
22 {
23   void DPSTRF_F77( const char* UPLO, int *N, double *A,
24 		   int *LDA, int *PIV, int *RANK, double *TOL, double *WORK,
25 		   int *INFO );
26 
27   // void DGETC2_F77( int *N, double *A, int *LDA, int *IPIV, int *JPIV, int *INFO );
28 
29   // void DORGRQ_F77( int *M, int *N, int *K, double *A, int* LDA, double* TAU,
30   // 		   double *WORK, const int *LWORK, int *INFO );
31 }
32 
33 namespace Pecos {
34 namespace util {
35 
GEMV(Teuchos::ETransp trans,bool conjugate_trans,double alpha,const Teuchos::SerialDenseMatrix<int,double> & matrix,const Teuchos::SerialDenseVector<int,double> & vector,double beta,Teuchos::SerialDenseVector<int,double> & result)36 template<> void GEMV<int,double>(
37       Teuchos::ETransp trans, bool conjugate_trans, double alpha,
38       const Teuchos::SerialDenseMatrix<int,double> &matrix,
39       const Teuchos::SerialDenseVector<int,double> &vector,
40       double beta,
41       Teuchos::SerialDenseVector<int,double> &result){
42 
43   int result_len = matrix.numRows();
44   if ((trans==Teuchos::TRANS) || (trans==Teuchos::CONJ_TRANS))
45     result_len = matrix.numCols();
46 
47   if (result.length() != result_len){
48     if (beta!=0.)
49       throw(std::runtime_error("result inconsistent with matrix but beta!=0"));
50     result.sizeUninitialized(result_len);
51   }
52 
53   int inc=1;
54   char T=Teuchos::ETranspChar[trans];
55   int M=matrix.numRows(), N=matrix.numCols(), stride=matrix.stride();
56   DGEMV_F77( &T, &M, &N, &alpha,
57 	     matrix.values(), &stride,
58 	     vector.values(), &inc, &beta, result.values(), &inc );
59 }
60 
61 // BMA: Disabled the complex variant since no current use cases
62 // When reactivated will need to edit Dakota and Pecos CMakeLists.txt
63 //#ifdef HAVE_COMPLEX_BLAS
64 //template<> void GEMV<int,std::complex<double> >(
65 //      Teuchos::ETransp trans, bool conjugate_trans, std::complex<double> alpha,
66 //      const Teuchos::SerialDenseMatrix<int,std::complex<double> >&matrix,
67 //      const Teuchos::SerialDenseVector<int,std::complex<double> >&vector,
68 //      std::complex<double> beta,
69 //      Teuchos::SerialDenseVector<int,std::complex<double> > &result){
70 //
71 //  int result_len = matrix.numRows();
72 //  if ((trans==Teuchos::TRANS) || (trans==Teuchos::CONJ_TRANS))
73 //    result_len = matrix.numCols();
74 //
75 //  if (result.length() != result_len){
76 //    if (beta!=0.)
77 //      throw(std::runtime_error("result inconsistent with matrix but beta!=0"));
78 //    result.sizeUninitialized(result_len);
79 //  }
80 //
81 //  if ((trans==Teuchos::TRANS) && conjugate_trans)
82 //    trans = Teuchos::CONJ_TRANS;
83 //
84 //  int inc=1;
85 //  char T=Teuchos::ETranspChar[trans];
86 //  int M=matrix.numRows(), N=matrix.numCols(), stride=matrix.stride();
87 //  ZGEMV_F77( &T, &M, &N, &alpha,
88 //	     matrix.values(), &stride,
89 //	     vector.values(), &inc, &beta, result.values(), &inc );
90 //}
91 //#endif
92 
cholesky(const RealMatrix & A,RealMatrix & result,Teuchos::EUplo uplo,bool for_lapack)93 int cholesky( const RealMatrix &A, RealMatrix &result, Teuchos::EUplo uplo,
94               bool for_lapack )
95 {
96   Teuchos::LAPACK<int, Real> la;
97   int M = A.numRows();
98   result.reshape( M, M );
99   result.assign( A );
100 
101   int info;
102   la.POTRF( Teuchos::EUploChar[uplo], M, result.values(), result.stride(),
103 	    &info );
104 
105   if ( info > 0 )
106     {
107       //std::cout << "cholesky() The matrix A is not positive definite\n";
108       return info;
109     }
110   if ( info < 0 )
111     {
112       std::stringstream msg;
113       msg << "cholesky() POTRF failed\n";
114       msg << "The " << std::abs( info ) << "-th argument had an ";
115       msg << "illegal value";
116       throw( std::runtime_error( msg.str() ) );
117       throw( std::runtime_error( msg.str() ) );
118     }
119 
120   // lapack returns the lower/upper triangle of the (symmetric) inverse of A
121   // so we must use symmetry to fill in the entries above/below the diagonal
122   if ( !for_lapack )
123     {
124       if ( uplo == Teuchos::LOWER_TRI )
125 	{
126 	  for ( int j = 1; j < M; j++ )
127 	    for ( int i = 0; i < j; i++ )
128 	      result(i,j) = 0.;
129 	}
130       else
131 	{
132 	  for ( int j = 1; j < M; j++ )
133 	    for ( int i = 0; i < j; i++ )
134 	      result(j,i) = 0.;
135 	}
136     }
137 
138   return info;
139 };
140 
solve_using_cholesky_factor(const RealMatrix & L,const RealMatrix & B,RealMatrix & result,Teuchos::EUplo uplo)141 int solve_using_cholesky_factor( const RealMatrix &L, const RealMatrix& B,
142                                  RealMatrix& result, Teuchos::EUplo uplo )
143 {
144   Teuchos::LAPACK<int, Real> la;
145   int m( L.numRows() ), num_rhs( B.numCols() );
146   result.reshape( B.numRows(), num_rhs );
147   result.assign( B );
148 
149   // Solves the system of linear equations A*X = B with a symmetric
150   // positive definite matrix A=LL' using the Cholesky factorization
151   int info;
152   la.POTRS( Teuchos::EUploChar[uplo], m, num_rhs,
153 	    L.values(), L.stride(),
154 	    result.values(), result.stride(), &info );
155   result.reshape( L.numRows(), num_rhs );
156 
157   return info;
158 };
159 
teuchos_cholesky_solve(RealMatrix & A,RealMatrix & B,RealMatrix & result,Real & rcond)160 int teuchos_cholesky_solve( RealMatrix &A, RealMatrix &B,
161                             RealMatrix& result, Real &rcond ){
162   // Copy A so that it is not affected outside the scope of this function
163   RealSymMatrix chol_factor( A.numRows() );
164   for (int j=0; j<A.numRows(); j++)
165     {
166       chol_factor(j,j) = A(j,j);
167       for (int i=j+1; i<A.numRows();i++)
168 	{
169 	  chol_factor(i,j) = A(i,j);
170 	  chol_factor(j,i) = A(i,j);
171 	}
172     }
173   // Copy B so that it is not affected outside the scope of this function
174   RealMatrix rhs( Teuchos::Copy, B, B.numRows(), B.numCols() );
175 
176   result.shapeUninitialized( rhs.numRows(), rhs.numCols() );
177   Teuchos::SerialSpdDenseSolver<int, Real> solver;
178   solver.setMatrix( rcp(&chol_factor, false) );
179   solver.setVectors( rcp(&result, false), rcp(&rhs, false) );
180   solver.factorWithEquilibration( true );
181   int info(0);
182   info = solver.factor();
183   //solver.solveToRefinedSolution( true );
184   info = solver.solve();
185   info = solver.reciprocalConditionEstimate( rcond );
186   return info;
187 }
188 
cholesky_solve(const RealMatrix & A,const RealMatrix & B,RealMatrix & result,Real & rcond)189 int cholesky_solve(const RealMatrix& A, const RealMatrix& B,
190                    RealMatrix& result, Real &rcond)
191 {
192   Teuchos::LAPACK<int, Real> la;
193 
194   int m( A.numRows() );
195   RealMatrix L;
196   int info = cholesky( A, L, Teuchos::LOWER_TRI, true );
197 
198   if ( info != 0 ) return info;
199 
200   // Compute the reciprocal of the condition number of A = L*L**T or A = U**T*U
201   // from its cholesky decompostion computed by POTRF.
202   if ( rcond < 0 )
203     {
204       Real *work = new Real [3*m]; // workspace array
205       int *iwork = new int [m];  // workspace array
206       la.POCON( Teuchos::EUploChar[Teuchos::LOWER_TRI], m, L.values(),
207 		L.stride(), A.normOne(), &rcond, work, iwork, &info );
208       delete [] work;
209       delete [] iwork;
210       if ( info < 0 )
211 	{
212 	  std::cout << "cholesky_solve() Incorrect arguments specified to ";
213 	  std::cout << "POCON()\n";
214 	  return info;
215 	}
216     }
217 
218   info = solve_using_cholesky_factor( L, B, result, Teuchos::LOWER_TRI );
219 
220   return info;
221 };
222 
qr_factorization(const RealMatrix & A,RealMatrix & Qfactor,RealMatrix & Rfactor)223 void qr_factorization( const RealMatrix &A, RealMatrix &Qfactor,
224 		       RealMatrix &Rfactor ){
225   Teuchos::LAPACK<int, Real> la;
226   int M( A.numRows() ), N( A.numCols() );
227   int K( std::min(M,N) );
228   RealMatrix qr_data( Teuchos::Copy, A, M, N );
229   /*RealMatrix qr_data( M, std::max( M, N ), false );
230   // Copy A into qr_data but if overdetermined allocate enough memory
231   // to store Q when passed to dorgrq.
232   for (int j=0; j<N; j++)
233     for (int i=0; i<M; i++)
234     qr_data(i,j) = A(i,j);*/
235 
236   //qr_data.shapeUninitialized( M, N );
237   //qr_data.assign( A );
238 
239   //---------------------------------//
240   // Get the optimal work array size //
241   //---------------------------------//
242 
243   int lwork;     // Size of Teuchos::LAPACK work array
244   Real *work;  // Teuchos::LAPACK work array
245   int info;      // Teuchos::LAPACK output flag
246   lwork = -1;             // special code for workspace query
247   work  = new Real [1]; // temporary work array
248   //tau.sizeUninitialized( K ) ;// scalar factors of reflectors
249   RealVector tau( K, false );
250   la.GEQRF( M, N, qr_data.values(), qr_data.stride(), tau.values(),
251 	    work, lwork, &info );
252   lwork = (int)work[0];  // optimal work array size returned by query
253   delete [] work;
254 
255   //------------------------------//
256   // Compute the QR factorization //
257   //------------------------------//
258   work  = new Real [lwork]; // Optimal work array
259   la.GEQRF( M, N, qr_data.values(), qr_data.stride(), tau.values(),
260 	    work, lwork, &info );
261   delete [] work;
262 
263   //-------------------//
264   // Form the R factor //
265   //-------------------//
266 
267   Rfactor.shape( M, N ); // initialize to zero
268   for (int j=0; j<N; j++)
269     for (int i=0; i<=std::min(j,M-1); i++)
270       Rfactor(i,j) = qr_data(i,j);
271 
272   //-------------------//
273   // Form the Q factor //
274   //-------------------//
275 
276   /*No economoy version.
277     "economy size" R. If m>n, R has only n rows.
278     If m<=n, this is the same as R = qr(A).
279 
280   // Note ORGQR give the equivalent of Q from qr(econ) in matlab
281   // that is only the first N columns.
282   // But I want all M columns so I use ORMQR to multiply the MxM identity by Q
283   Qfactor.shape( M, M );; // initialize to zero
284   for (int i=0; i<M; i++)
285     Qfactor(i,i) = 1.;
286 
287   // query optimal work size
288   lwork = -1;             // special code for workspace query
289   work  = new Real [1]; // temporary work array
290   int LDA = qr_data.stride();
291   la.ORMQR( 'L', 'N', M, M, K, qr_data.values(), LDA,
292 	    tau.values(), Qfactor.values(), Qfactor.stride(),
293 	    work, lwork, &info );
294   lwork = (int)work[0];  // optimal work array size returned by query
295   delete [] work;
296 
297   work  = new Real [lwork]; // Optimal work array
298   la.ORMQR( 'L', 'N', M, M, K, qr_data.values(), LDA,
299 	    tau.values(), Qfactor.values(), Qfactor.stride(),
300 	    work, lwork, &info );
301   delete [] work;
302 */
303 
304   // NESTA needs economy version
305   Qfactor.shape( M, N );; // initialize to zero
306   for (int i=0; i<M; i++)
307     Qfactor(i,i) = 1.;
308 
309   // query optimal work size
310   lwork = -1;             // special code for workspace query
311   work  = new Real [1]; // temporary work array
312   la.ORGQR( M, N, tau.length(), qr_data.values(), qr_data.stride(),
313 	    tau.values(), work, lwork, &info );
314   lwork = (int)work[0];  // optimal work array size returned by query
315   delete [] work;
316 
317   work  = new Real [lwork]; // Optimal work array
318   la.ORGQR( M, N, tau.length(), qr_data.values(), qr_data.stride(),
319 	    tau.values(), work, lwork, &info );
320   delete [] work;
321 
322   Qfactor.shapeUninitialized( M, N );
323   for (int j=0; j<N; j++)
324     for (int i=0; i<M; i++)
325       Qfactor(i,j) = qr_data(i,j);
326 
327 
328 
329 }
330 
331 /*void qr_solve( const RealMatrix &B, const RealMatrix &qr_data,
332 	       const RealVector &tau, RealMatrix &result ){
333   Teuchos::LAPACK<int, Real> la;
334   int M( B.numRows() ), num_rhs( B.numCols() );
335   int K = tau.length();
336   RealMatrix QtB( Teuchos::Copy, B, M, num_rhs );
337 
338   // query optimal work size
339   int info ;
340   int lwork = -1;             // special code for workspace query
341   Real *work  = new Real [1]; // temporary work array
342   la.ORMQR( 'L', 'T', M, num_rhs, K, qr_data.values(),
343 	    qr_data.stride(), tau.values(), QtB.values(), QtB.stride(),
344 	    work, lwork, &info );
345   lwork = (int)work[0];  // optimal work array size returned by query
346   delete [] work;
347   work  = new Real [lwork]; // Optimal work array
348   la.ORMQR( 'L', 'T', M, num_rhs, K, qr_data.values(),
349 	    qr_data.stride(), tau.values(), QtB.values(), QtB.stride(),
350 	    work, lwork, &info );
351 
352   substitution_solve( qr_data, QtB, result, Teuchos::NO_TRANS,
353 		      Teuchos::UPPER_TRI, Teuchos::NON_UNIT_DIAG );
354   delete [] work;
355 		      }*/
356 
qr_solve(const RealMatrix & A,const RealMatrix & B,RealMatrix & result,Teuchos::ETransp trans)357 void qr_solve( const RealMatrix &A, const RealMatrix &B, RealMatrix &result,
358 	       Teuchos::ETransp trans )
359 {
360   Teuchos::LAPACK<int, Real> la;
361 
362   RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
363   int M( A.numRows() ), N( A.numCols() ), num_rhs( B.numCols() );
364   RealMatrix B_copy( Teuchos::Copy, B, B.numRows(), B.numCols() );;
365   B_copy.reshape( std::max( M, N ), num_rhs );
366   //---------------------------------//
367   // Get the optimal work array size //
368   //---------------------------------//
369 
370   int lwork;     // Size of Teuchos::LAPACK work array
371   Real *work;  // Teuchos::LAPACK work array
372   int info=0;      // Teuchos::LAPACK output flag
373   int lda = A_copy.stride();
374   int ldb = B_copy.stride();
375 
376   lwork = -1;             // special code for workspace query
377   work  = new Real [1]; // temporary work array
378   la.GELS( Teuchos::ETranspChar[trans], M, N, num_rhs, A_copy.values(),
379 	   lda, B_copy.values(), ldb, work, lwork, &info );
380   lwork = (int)work[0];  // optimal work array size returned by query
381   delete [] work;
382   work  = new Real [lwork]; // Optimal work array
383 
384   //---------------------------------//
385   // Solve Ax = b                    //
386   //---------------------------------//
387   la.GELS( Teuchos::ETranspChar[trans], M, N, num_rhs, A_copy.values(), lda,
388 	   B_copy.values(), ldb, work, lwork, &info );
389   if ( info < 0 )
390     {
391       std::stringstream msg;
392       msg << "qr_solve() dgels failed. ";
393       msg << "The " << std::abs( info ) << "-th argument had an ";
394       msg << "illegal value";
395       throw( std::runtime_error( msg.str() ) );
396     }
397   if ( info > 0 )
398     {
399       std::stringstream msg;
400       msg << "QR Solve dgels failed. ";
401       msg << "The " << info << "-th diagonal element of the ";
402       msg << "triangular factor of A is zero, so that A does not have";
403       msg << "full rank; the least squares solution could not be computed.";
404       throw( std::runtime_error( msg.str() ) );
405     }
406   delete [] work;
407   result.reshape( N, num_rhs );
408   for ( int j = 0; j < num_rhs; j++ )
409     for ( int i = 0; i < N; i++ )
410       result(i,j) = B_copy(i,j);
411 };
412 
413 
svd_solve(const RealMatrix & A,const RealMatrix & B,RealMatrix & result_0,RealVector & result_1,int & rank,Real rcond)414 void svd_solve( const RealMatrix &A, const RealMatrix &B, RealMatrix &result_0,
415 		RealVector &result_1, int &rank, Real rcond )
416 {
417   Teuchos::LAPACK<int, Real> la;
418 
419   //-----------------//
420   // Allocate memory //
421   //-----------------//
422 
423   int M( A.numRows() ),  N( A.numCols() ), num_rhs( B.numCols() );
424 
425   if (num_rhs<1)
426     throw(std::runtime_error("B has zero columns"));
427 
428   RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
429   result_1.sizeUninitialized( std::min( M, N ) );
430 
431   //---------------------------------//
432   // Get the optimal work array size //
433   //---------------------------------//
434 
435   int lwork;     // Size of Teuchos::LAPACK work array
436   Real *work;    // Teuchos::LAPACK work array
437   int info;      // Teuchos::LAPACK output flag
438   int lda = A_copy.stride();
439   int ldb = std::max( std::max( B.stride(), lda ), N );
440 
441   result_0.shapeUninitialized( M, num_rhs );
442   result_0.assign( B );
443   result_0.reshape( ldb, num_rhs );
444 
445   lwork = -1;             // special code for workspace query
446   work  = new Real [1]; // temporary work array
447   la.GELSS( M, N, num_rhs, A_copy.values(), lda, result_0.values(), ldb,
448 	    result_1.values(), rcond, &rank, work, lwork, &info );
449   lwork = (int)work[0];  // optimal work array size returned by query
450 
451   delete [] work;
452   work  = new Real [lwork]; // Optimal work array
453 
454   //---------------------------------//
455   // Solve Ax = b                    //
456   //---------------------------------//
457   la.GELSS( M, N, num_rhs, A_copy.values(), lda, result_0.values(), ldb,
458 	    result_1.values(), rcond, &rank, work, lwork, &info );
459 
460   result_0.reshape( N, num_rhs );
461   delete [] work;
462 };
463 
cholesky_factorization_update_insert_column(RealMatrix & A,RealMatrix & U,RealMatrix & col,int iter,Real delta)464 int cholesky_factorization_update_insert_column( RealMatrix &A, RealMatrix &U,
465 						 RealMatrix &col, int iter,
466 						 Real delta )
467 {
468   int info( 0 );
469   Real col_norm = col.normFrobenius();
470   if ( iter == 0 )
471     {
472       U(0,0) = std::sqrt( col_norm * col_norm + delta );
473     }
474   else
475     {
476       if ( iter >= U.numRows() )
477 	throw( std::runtime_error("cholesky_factorization_update_insert_column: iter out of bounds") );
478 
479       RealMatrix w;//( iter, 1, false );
480       RealMatrix U_old( Teuchos::View, U, iter, iter, 0, 0 );
481       // compute column k in gramian matrix A'A
482       RealMatrix gramian_col( iter, 1, false );
483       gramian_col.multiply( Teuchos::TRANS, Teuchos::NO_TRANS,
484 			    1.0, A, col, 0.0 );
485       substitution_solve( U_old, gramian_col, w, Teuchos::TRANS,
486 			  Teuchos::UPPER_TRI );
487       Real w_norm = w.normFrobenius();
488 
489       if ( col_norm * col_norm + delta - w_norm*w_norm <=
490 	   std::numeric_limits<Real>::epsilon() )
491 	{
492 	  // New column is colinear. That is, it is in the span of the active
493 	  // set
494 	  info = 1;
495 	}
496       else
497 	{
498 	  U(iter,iter) = std::sqrt(( col_norm*col_norm + delta )-w_norm*w_norm );
499 	  RealMatrix U_col( Teuchos::View, U, iter, 1, 0, iter );
500 	  U_col.assign( w );
501 	}
502     }
503   return info;
504 };
505 
givens_rotation(RealVector & x,RealVector & x_rot,RealMatrix & givens_matrix)506 void givens_rotation( RealVector &x, RealVector &x_rot, RealMatrix &givens_matrix )
507 {
508   givens_matrix.reshape( 2, 2 );
509   x_rot.sizeUninitialized( x.length() );
510 
511   if ( x[1] == 0 )
512     {
513       //givens_matrix = I
514       givens_matrix(0,0) = 1.0; givens_matrix(1,1) = 1.0;
515       x_rot.assign( x );
516     }
517   else
518     {
519       Real x_norm = x.normFrobenius();
520 
521       givens_matrix(0,0) = x[0] / x_norm;
522       givens_matrix(0,1) = x[1] / x_norm;
523       givens_matrix(1,0) = -x[1] / x_norm;
524       givens_matrix(1,1) = x[0] / x_norm;
525 
526       x_rot[0] = x_norm;
527       x_rot[1] = 0.0;
528     }
529 };
530 
cholesky_factorization_update_delete_column(RealMatrix & U,int col_index,int N)531 void cholesky_factorization_update_delete_column( RealMatrix &U,
532 						  int col_index,
533 						  int N )
534 {
535   if ( col_index != N - 1 )
536     {
537       // delete column but do not resize the matrix U
538       delete_column( col_index, U, false );
539     };
540 
541   RealVector x( 2, false );
542   for ( int n = col_index; n < N-1; n++ )
543     {
544       RealMatrix givens_matrix;
545       RealVector x_rot;
546       x[0] = U(n,n); x[1] = U(n+1,n);
547       givens_rotation( x, x_rot, givens_matrix );
548       U(n,n) = x_rot[0]; U(n+1,n) = x_rot[1];
549       if ( n < N - 2 )
550 	{
551 	  RealMatrix U_sub( Teuchos::View, U, 2, N - n - 1, n, n + 1 );
552 	  RealMatrix U_sub_rot( U_sub.numRows(), U_sub.numCols(), false );
553 	  U_sub_rot.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
554 			      1.0, givens_matrix, U_sub, 0.0 );
555 	  U_sub.assign( U_sub_rot );
556 	}
557     }
558 
559   // Zero out last row and column of U
560   for ( int m = 0; m < N; m++ ) U(m,N-1) = 0.0;
561   for ( int n = 0; n < N; n++ ) U(N-1,n) = 0.0;
562 };
563 
conjugate_gradients_solve(const RealMatrix & A,const RealVector & b,RealVector & x,Real & relative_residual_norm,int & iters_taken,Real cg_tol,int max_iter,int verbosity)564 int conjugate_gradients_solve( const RealMatrix &A, const RealVector &b, RealVector &x,
565 			       Real &relative_residual_norm,
566 			       int &iters_taken,
567 			       Real cg_tol, int max_iter,
568 			       int verbosity )
569 {
570   int info( 0 );
571 
572   int M( A.numRows() ), N( A.numCols() );
573 
574   if ( x.length() != N )
575     // Initialize to zero
576     x.size( N );
577 
578   RealVector current_x(Teuchos::Copy, x.values(), x.length());
579 
580 
581   Real b_norm = b.normFrobenius();
582   RealVector residual(Teuchos::Copy, b.values(), b.length());
583   residual.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
584 		     -1.0, A, current_x, 1.0 );
585 
586   if ( b_norm < std::numeric_limits<Real>::epsilon() )
587     b_norm = 1.0;
588 
589   relative_residual_norm = residual.normFrobenius() / b_norm ;
590 
591   if ( Teuchos::ScalarTraits<Real>::isnaninf( relative_residual_norm ) )
592     {
593       // At least one of the matrix inputs contains nan of inf
594       if ( verbosity > 2 )
595 	{
596 	  std::stringstream msg;
597 	  msg << "conjugate_gradient_solve() Warning: at least one of the ";
598 	  msg << "matrix inputs contains nan and/or inf.\n";
599 	  std::cout << msg.str();
600 	}
601       info = 3;
602       return info;
603     }
604 
605   if ( relative_residual_norm <= cg_tol )
606     {
607       info = 0;
608       return info;
609     }
610 
611   if ( verbosity > 2 )
612     {
613       std::cout << "CG iteration: " << 0 << ", residual: ";
614       std::cout << relative_residual_norm << "\n";
615     }
616 
617   RealVector p( residual );
618   RealVector Ap( A.numRows() );
619 
620   iters_taken = 0;
621   bool done( false );
622   Real rtr_old( residual.dot( residual ) );
623 
624   while ( !done )
625     {
626       Ap.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
627 		   1.0, A, p, 0.0 );
628 
629       Real ptAp = p.dot( Ap );
630 
631       Real alpha = rtr_old / ptAp;
632 
633       if ( Teuchos::ScalarTraits<Real>::isnaninf( alpha ) || ( ptAp <= 0 ) )
634 	{
635 	  if ( verbosity > 2 )
636 	    {
637 	      // The matrix is not positive definite
638 	      std::stringstream msg;
639 	      msg << "conjugate_gradient_solve() Warning: A is not postive ";
640 	      msg << "definite.\n";
641 	      std::cout << msg.str();
642 	    }
643 	  info = 2;
644 	  return info;
645 	}
646 
647       for ( int n = 0; n < N; n++ )
648 	current_x[n] += alpha * p[n];
649 
650       if ( (iters_taken+1)%50 == 0 )
651 	{
652 	  // Avoid numerical drift due to rounding error
653 	  residual.assign( b );
654 	  residual.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
655 			     -1.0, A, current_x, 1.0 );
656 	}
657       else
658 	{
659 	  for ( int m = 0; m < M; m++ )
660 	    residual[m] -= alpha * Ap[m];
661 	}
662 
663       Real rtr = residual.dot( residual );
664 
665       for ( int m = 0; m < M; m++ )
666 	p[m] = residual[m] + rtr / rtr_old * p[m];
667 
668       rtr_old = rtr;
669 
670       Real current_relative_residual_norm = std::sqrt( rtr_old ) / b_norm;
671 
672       if ( current_relative_residual_norm < relative_residual_norm )
673 	{
674 	  relative_residual_norm = current_relative_residual_norm;
675 	  for ( int n = 0; n < N; n++ )
676 	    x[n] = current_x[n];
677 	}
678 
679       if ( verbosity > 2 )
680 	{
681 	  std::cout << "CG iteration: " << iters_taken + 1<< ", residual: ";
682 	  std::cout <<  current_relative_residual_norm << "\n";
683 	}
684 
685       if ( current_relative_residual_norm < cg_tol )
686 	{
687 	  if ( verbosity > 2 )
688 	    {
689 	      std::stringstream msg;
690 	      msg << "conjugate_gradient_solve() Exiting residual below ";
691 	      msg << "tolerance.\n";
692 	      std::cout << msg.str();
693 	    }
694 	  done = true;
695 	  info = 0;
696 	}
697 
698       iters_taken++;
699       if ( iters_taken == max_iter )
700 	{
701 	  if ( verbosity > 2 )
702 	    {
703 	      std::stringstream msg;
704 	      msg << "conjugate_gradient_solve() Exiting maximum number of ";
705 	      msg << "iterations reached.\n";
706 	      std::cout << msg.str();
707 	    }
708 	  done = true;
709 	  info = 1;
710 	}
711     }
712   return info;
713 };
714 
715 
equality_constrained_least_squares_solve(const RealMatrix & A,const RealVector & b,const RealMatrix & C,const RealVector & d,RealVector & x,int verbosity)716 void equality_constrained_least_squares_solve( const RealMatrix &A,
717 					       const RealVector &b,
718 					       const RealMatrix &C,
719 					       const RealVector &d,
720 					       RealVector &x,
721 					       int verbosity )
722 {
723   RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() ),
724     C_copy( Teuchos::Copy, C, C.numRows(), C.numCols() );
725   RealVector b_copy( Teuchos::Copy, b.values(), b.length() ),
726     d_copy( Teuchos::Copy, d.values(), d.length() );
727 
728   int M( A_copy.numRows() ), N( A_copy.numCols() ), lda( A_copy.stride() ),
729     ldc( C_copy.stride() );
730 
731   x.sizeUninitialized( N );
732 
733   Teuchos::LAPACK<int, Real> la;
734   double* work;    // LAPACK work array
735   int info( 0 );   // LAPACK output flag
736   int lwork; // size of LAPACK work array
737   int num_cons( C_copy.numRows() ); // number of equality constraints
738 
739   // Get the optimal work array size
740   lwork = -1; // special code for workspace query
741   work  = new double [1]; // temporary work array
742   la.GGLSE( M, N, num_cons, A_copy.values(), lda,
743 	    C_copy.values(), ldc, b_copy.values(), d_copy.values(), x.values(),
744 	    work, lwork, &info );
745   lwork = (int)work[0]; // optimal work array size returned by query
746   delete [] work;
747   work  = new double [lwork]; // Optimal work array
748 
749   // Least squares computation using LAPACK's DGGLSE subroutine which uses
750   // a GRQ factorization method for solving the eq-constrained LLS problem
751   info = 0;
752   la.GGLSE( M, N, num_cons, A_copy.values(), lda, C_copy.values(),
753 	    ldc, b_copy.values(), d_copy.values(), x.values(),
754 	    work, lwork, &info );
755   delete [] work;
756 
757   if ( info < 0 )
758     {
759       std::stringstream msg;
760       msg << "equality_constrained_least_squares() dgglse failed. ";
761       msg << "The " << std::abs( info ) << "-th argument had an ";
762       msg << "illegal value";
763       throw( std::runtime_error( msg.str() ) );
764     }
765   if ( info == 1 )
766     {
767       std::stringstream msg;
768       msg << "the upper triangular factor R associated with C in the ";
769       msg << "generalized RQ factorization of the pair (C, A) is ";
770       msg << "singular, so that rank(C) < num_cons; the least squares ";
771       msg << "solution could not be computed.";
772       throw( std::runtime_error( msg.str() ) );
773     }
774   if ( info == 2 )
775     {
776       std::stringstream msg;
777       msg << "the (N-P) by (N-P) part of the upper trapezoidal factor ";
778       msg << "T associated with A in the generalized RQ factorization ";
779       msg << "of the pair (C, A) is singular, so that\n";
780       msg << "rank( (A) ) < N; the least squares solution could not\n";
781       msg << "    ( (C) )\n";
782       msg << "be computed.";
783       throw( std::runtime_error( msg.str() ) );
784     }
785 }
786 
cholesky_inverse(RealMatrix & U,RealMatrix & result,Teuchos::EUplo uplo)787 void cholesky_inverse( RealMatrix &U, RealMatrix &result, Teuchos::EUplo uplo  )
788 {
789   Teuchos::LAPACK<int, Real> la;
790   int N = U.numRows();
791   // copy U into result. Lapack will compute result inplace
792   result.shapeUninitialized( N, N );
793   result.assign( U );
794 
795   // extract arguments needed for lapack call
796   int lda( result.stride() );
797   int info( 0 );
798   la.POTRI( Teuchos::EUploChar[uplo], N, result.values(), lda, &info );
799 
800   if ( info < 0 )
801     {
802       std::stringstream msg;
803       msg << "cholesky_inverse() dpotri failed. ";
804       msg << "The " << std::abs( info ) << "-th argument had an ";
805       msg << "illegal value";
806       throw( std::runtime_error( msg.str() ) );
807     }
808   else if ( info > 0 )
809     {
810       std::stringstream msg;
811       msg << "cholesky_inverse() dpotri failed. ";
812       msg << "The (" << info << "," << info << ") element of the factor U or L is ";
813       msg << "zero and the inverse could not be computed";
814       throw( std::runtime_error( msg.str() ) );
815     }
816 
817   // lapack returns the lower/upper triangle of the (symmetric) inverse of A
818   // so we must use symmetry to fill in the entries above/below the diagonal
819   if ( uplo == Teuchos::LOWER_TRI )
820     {
821       for ( int j = 1; j < N; j++ )
822 	for ( int i = 0; i < j; i++ )
823 	  result(i,j) = result(j,i);
824     }
825   else
826     {
827       for ( int j = 1; j < N; j++ )
828 	for ( int i = 0; i < j; i++ )
829 	  result(j,i) = result(i,j);
830     }
831 };
832 
pivoted_qr_factorization(const RealMatrix & A,RealMatrix & Q,RealMatrix & R,IntVector & p)833 void pivoted_qr_factorization( const RealMatrix &A, RealMatrix &Q, RealMatrix &R,
834                                IntVector &p )
835 {
836   Teuchos::LAPACK<int, Real> la;
837 
838   RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
839   int M( A.numRows() ), N( A.numCols() ), K( std::min( M, N ) );
840 
841   //Q.shapeUninitialized( M, K );
842   Q.shape( M, K );
843   R.shape( K, N ); // must be initialized to 0
844   p.size( N ); // must be initialized to 0
845 
846   //---------------------------------//
847   // Get the optimal work array size //
848   //---------------------------------//
849 
850   int lwork;   // Size of Teuchos::LAPACK work array
851   Real *work;  // Teuchos::LAPACK work array
852   int info;    // Teuchos::LAPACK output flag
853   int lda = std::max( 1, A_copy.stride() );
854   RealVector tau( K, true );
855 
856   lwork = -1;           // special code for workspace query
857   work  = new Real [1]; // temporary work array
858 
859   DGEQP3_F77( &M, &N, A_copy.values(), &lda, p.values(), tau.values(),
860 	      work, &lwork, &info );
861 
862   lwork = (int)work[0];  // optimal work array size returned by query
863   delete [] work;
864   work  = new Real [lwork]; // Optimal work array
865 
866   //---------------------------------//
867   // Compute the QR factorization    //
868   //---------------------------------//
869 
870   DGEQP3_F77( &M, &N, A_copy.values(), &lda, p.values(), tau.values(),
871 	      work, &lwork, &info );
872 
873   if ( info < 0 )
874     {
875       std::stringstream msg;
876       msg << "privoted_qr_factorization() dgeqp3 failed. ";
877       msg << "The " << std::abs( info ) << "-th argument had an ";
878       msg << "illegal value";
879       throw( std::runtime_error( msg.str() ) );
880     }
881 
882   delete [] work;
883 
884   //---------------------------------//
885   // Form the Q and R matrices       //
886   //---------------------------------//
887 
888   for ( int m = 0; m < K; m++ )
889     {
890       for ( int n = m; n < N; n++ )
891 	R(m,n) = A_copy(m,n);
892     }
893 
894   lwork = -1;           // special code for workspace query
895   work  = new Real [1]; // temporary work array
896 
897   la.ORGQR( M, K, K, A_copy.values(), lda, tau.values(),
898 	   work, lwork, &info );
899 
900   lwork = (int)work[0];  // optimal work array size returned by query
901   delete [] work;
902 
903   work  = new Real [lwork]; // Optimal work array
904 
905   la.ORGQR( M, K, K, A_copy.values(), lda, tau.values(),
906 	   work, lwork, &info );
907 
908   for ( int n = 0; n < K; n++ )
909     {
910       for ( int m = 0; m < M; m++ )
911 	{
912 	  Q(m,n) = A_copy(m,n);
913 	}
914     }
915 
916   // fortran returns indices 1,...,N
917   // c++ requires 0,...,N-1
918   for ( int n = 0; n < N; n++ )
919     p[n]--;
920 
921   delete [] work;
922 }
923 
lu_inverse(const RealMatrix & L,const RealMatrix & U,const IntVector & p,RealMatrix & LU_inv)924 void lu_inverse( const RealMatrix &L, const RealMatrix &U, const IntVector &p,
925 		 RealMatrix &LU_inv )
926 {
927   int M = L.numRows(), N = U.numCols();
928 #ifdef DEBUG
929   if ( M != N )
930     {
931       std::string msg="LU_inverse() The dimensions of L and U are inconsistent";
932       throw( std::runtime_error( msg ) );
933     }
934 #endif
935   LU_inv.shape( M, M );
936   RealMatrix I;
937   eye( M, I );
938   if ( p.length() != 0 )
939     permute_matrix_columns( I, p );
940   RealMatrix X;
941   substitution_solve( L, I, X, Teuchos::NO_TRANS,
942 		      Teuchos::LOWER_TRI, Teuchos::NON_UNIT_DIAG );
943   substitution_solve( U, X, LU_inv, Teuchos::NO_TRANS,
944 		      Teuchos::UPPER_TRI, Teuchos::NON_UNIT_DIAG );
945 };
946 
lu_solve(RealMatrix & A,const RealMatrix & B,RealMatrix & result,bool copy,Teuchos::ETransp trans)947 void lu_solve( RealMatrix &A,
948 	       const RealMatrix &B,
949 	       RealMatrix &result,
950 	       bool copy,
951 	       Teuchos::ETransp trans ){
952   Teuchos::LAPACK<int, Real> la;
953   int M = A.numRows(), N = A.numCols();
954 
955   RealMatrix A_copy;
956   if ( copy )
957     {
958       A_copy.shapeUninitialized( M, N );
959       A_copy.assign( A );
960     }
961 
962   IntVector ipiv( std::min( M, N ), false );
963   int info;
964   if ( copy )
965     la.GETRF( M, N, A_copy.values(), A_copy.stride(), ipiv.values(), &info );
966   else
967     la.GETRF( M, N, A.values(), A.stride(), ipiv.values(), &info );
968 
969   if ( info < 0 )
970     {
971       std::stringstream msg;
972       msg << "GETRF: The " << std::abs(info) <<  "ith argument had " <<
973 	"an illegal value";
974       throw( std::runtime_error( msg.str() ) );
975     }
976   if ( info > 0 )
977     {
978       std::stringstream msg;
979 	msg << "U(" <<  info << "," << info << ") is exactly zero. " <<
980 	"The factorization has been completed, but the factor U is exactly " <<
981 	"singular, and division by zero will occur if it is used " <<
982 	"to solve a system of equations";
983       throw( std::runtime_error( msg.str() ) );
984     }
985   result.shapeUninitialized( B.numRows(), B.numCols() );
986   result.assign( B );
987 
988   if ( copy )
989     la.GETRS( Teuchos::ETranspChar[trans], M, result.numCols(), A_copy.values(),
990 	      A_copy.stride(), ipiv.values(), result.values(), result.stride(),
991 	      &info );
992   else
993     la.GETRS( Teuchos::ETranspChar[trans], M, result.numCols(), A.values(),
994 	      A.stride(), ipiv.values(), result.values(), result.stride(),
995 	      &info );
996 
997 
998   if ( info < 0 )
999     {
1000       std::stringstream msg;
1001       msg << "GETRS: The " <<std::abs(info) <<  "ith argument had " <<
1002 	"an illegal value";
1003       throw( std::runtime_error( msg.str() ) );
1004     }
1005 };
1006 
pivoted_lu_factorization(const RealMatrix & A,RealMatrix & L,RealMatrix & U,IntVector & pivots)1007 void pivoted_lu_factorization( const RealMatrix &A,
1008 			       RealMatrix &L,
1009 			       RealMatrix &U,
1010 			       IntVector &pivots )
1011 {
1012   Teuchos::LAPACK<int, Real> la;
1013 
1014   RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
1015   int M( A.numRows() ), N( A.numCols() ), K( std::min( M, N ) );
1016 
1017   int info;
1018   IntVector la_pivots( K, false );
1019   la.GETRF( M, N, A_copy.values(), A_copy.stride(), la_pivots.values(), &info );
1020 
1021   if ( info < 0 )
1022     {
1023       std::stringstream msg;
1024       msg << "GETRF: The " << std::abs(info) <<  "ith argument had " <<
1025 	"an illegal value";
1026       throw( std::runtime_error( msg.str() ) );
1027     }
1028   if ( info > 0 )
1029     {
1030       std::stringstream msg;
1031       msg << "U(" <<  info << "," << info << ") is exactly zero. " <<
1032 	"The factorization has been completed, but the factor U is exactly " <<
1033 	"singular, and division by zero will occur if it is used " <<
1034 	"to solve a system of equations";
1035       throw( std::runtime_error( msg.str() ) );
1036     }
1037 
1038   L.shape( M, K );
1039   U.shape( K, N );
1040 
1041   // Fill U
1042   for ( int n = 0; n < N; n++ )
1043     {
1044       if ( n < K ) U(n,n) = A_copy(n,n);
1045       for ( int m = 0; m < std::min(n,K); m++ )
1046 	U(m,n) = A_copy(m,n);
1047     }
1048 
1049   for ( int n = 0; n < K; n++ )
1050     {
1051       L(n,n) = 1.;
1052       for ( int m = n+1; m < M; m++ )
1053 	L(m,n) = A_copy(m,n);
1054     }
1055 
1056   // Lapack does not support integers so must cast.
1057   // TODO write implementation for integers
1058   RealVector dbl_pivots( M, false );
1059   for ( int n = 0; n < dbl_pivots.length(); n++ )
1060     dbl_pivots[n] = (double)n;
1061 
1062   // convert pivots to a format that supports slicing (e.g python) to swap rows
1063   RealVector row_pivots;
1064   pivot_matrix_rows( dbl_pivots, la_pivots, 1, true,  row_pivots );
1065 
1066   pivots.sizeUninitialized( K );
1067   for ( int n = 0; n < K; n++ )
1068     pivots[n] = (int)row_pivots[n];
1069 
1070 }
1071 
1072 
pivoted_cholesky_factorization(RealMatrix & A,RealMatrix & L,IntVector & p,int & rank,double tol)1073 void pivoted_cholesky_factorization( RealMatrix &A, RealMatrix &L, IntVector &p,
1074 				     int &rank, double tol )
1075 {
1076   Teuchos::LAPACK<int, Real> la;
1077 
1078   RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
1079   int N( A.numRows() );
1080 
1081   L.shape( N, N );
1082   p.size( N ); // must be initialized to 0
1083 
1084   //------------------------------------//
1085   // Compute the cholesky factorization //
1086   //------------------------------------//
1087 
1088   Real *work;  // Teuchos::LAPACK work array
1089   int info;    // Teuchos::LAPACK output flag
1090   int lda = std::max( 1, A_copy.stride() );
1091   rank = 0;
1092 
1093   work  = new Real [2*N]; // temporary work array
1094 
1095   //Teuchos::EUplo uplo
1096   //Teuchos::EUploChar[uplo]
1097   char uplo = 'L';
1098   DPSTRF_F77( &uplo, &N, A_copy.values(),
1099 	      &lda, p.values(), &rank, &tol, work, &info );
1100   delete [] work;
1101 
1102   if ( info < 0 )
1103     {
1104       std::stringstream msg;
1105       msg << "privoted_cholesky_factorization() dpstrf failed. ";
1106       msg << "The " << std::abs( info ) << "-th argument had an ";
1107       msg << "illegal value";
1108       throw( std::runtime_error( msg.str() ) );
1109     }
1110   else if ( info > 0 )
1111     {
1112       std::stringstream msg;
1113       msg << "privoted_cholesky_factorization() dpstrf failed. ";
1114       msg << "The matrix A is either rank deficient with computed rank " << rank
1115 	  << " , or is indefinite.  See Section 7 of "
1116 	  << "LAPACK Working Note #161 for further information.\n";
1117       //std::cout << msg.str();
1118     }
1119 
1120   // Fill upper triangular part of cholesky factor
1121   for ( int m = 0; m < N; m++ )
1122     {
1123     for ( int n = 0; n < m+1; n++ )
1124       L(m,n) = A_copy(m,n);
1125     }
1126 
1127   // fortran returns indices 1,...,N
1128   // c++ requires 0,...,N-1
1129   for ( int n = 0; n < N; n++ )
1130     p[n]--;
1131 }
1132 
cholesky_condition_number(RealMatrix & L)1133 Real cholesky_condition_number( RealMatrix &L )
1134 {
1135   Teuchos::LAPACK<int, Real> la;
1136   int m( L.numRows() );
1137   Real *work = new Real [3*m]; // workspace array
1138   int *iwork = new int [m];  // workspace array
1139   Real rcond = 0.;
1140   int info;
1141   la.POCON( Teuchos::EUploChar[Teuchos::LOWER_TRI], m, L.values(),
1142 	    L.stride(), L.normOne(), &rcond, work, iwork, &info );
1143   delete [] work;
1144   delete [] iwork;
1145   if ( info < 0 )
1146     {
1147       std::stringstream msg;
1148       msg << "cholesky_condition_number() Incorrect arguments specified to "
1149 	  << "POCON()\n";
1150       throw( std::runtime_error( msg.str() ) );
1151     }
1152   return  rcond;
1153 }
1154 
symmetric_eigenvalue_decomposition(const RealMatrix & A,RealVector & eigenvalues,RealMatrix & eigenvectors)1155 void symmetric_eigenvalue_decomposition( const RealMatrix &A,
1156                                          RealVector &eigenvalues,
1157                                          RealMatrix &eigenvectors )
1158 {
1159   Teuchos::LAPACK<int, Real> la;
1160 
1161   int N( A.numRows() );
1162   eigenvectors.shapeUninitialized( N, N );
1163   eigenvectors.assign( A );
1164 
1165   char jobz = 'V'; // compute eigenvectors
1166 
1167   char uplo = 'U'; // assume only upper triangular part of matrix is stored
1168 
1169   eigenvalues.sizeUninitialized( N );
1170 
1171   int info;        // Teuchos::LAPACK output flag
1172   RealVector work; // Teuchos::LAPACK work array;
1173   int lwork = -1;  // Size of Teuchos::LAPACK work array
1174 
1175   // Compute optimal size of work array
1176   work.sizeUninitialized( 1 ); // temporary work array
1177   la.SYEV( jobz, uplo, N, eigenvectors.values(), eigenvectors.stride(),
1178 	   eigenvalues.values(), work.values(), lwork, &info );
1179 
1180   lwork = (int)work[0];
1181   work.sizeUninitialized( lwork );
1182 
1183   la.SYEV( jobz, uplo, N, eigenvectors.values(), eigenvectors.stride(),
1184 	   eigenvalues.values(), work.values(), lwork, &info );
1185 
1186   if ( info > 0 )
1187     {
1188       std::stringstream msg;
1189       msg << "The algorithm failed to converge." << info
1190 	  << " off-diagonal elements of an intermediate tridiagonal "
1191 	  << "form did not converge to zero.";
1192       throw( std::runtime_error( msg.str() ) );
1193     }
1194   else if ( info < 0 )
1195     {
1196       std::stringstream msg;
1197       msg << " The " << std::abs( info ) << " argument had an illegal value.";
1198       throw( std::runtime_error( msg.str() ) );
1199     }
1200 };
1201 
complete_pivoted_lu_factorization(const RealMatrix & A,RealMatrix & L_factor,RealMatrix & U_factor,IntVector & row_pivots,IntVector & column_pivots,int max_iters)1202 void complete_pivoted_lu_factorization( const RealMatrix &A,
1203 					RealMatrix &L_factor,
1204 					RealMatrix &U_factor,
1205 					IntVector &row_pivots,
1206 					IntVector &column_pivots,
1207 					int max_iters )
1208 {
1209   int num_rows = A.numRows(), num_cols = A.numCols();
1210   max_iters = std::min( max_iters, std::min( num_rows, num_cols ) );
1211 
1212   RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
1213   row_pivots.sizeUninitialized( num_rows );
1214   for ( int i = 0; i < num_rows; i++ )
1215     row_pivots[i] = i;
1216   column_pivots.sizeUninitialized( num_cols );
1217   for ( int j = 0; j < num_cols; j++ )
1218     column_pivots[j] = j;
1219 
1220   RealVector max_col_values( num_cols, false );
1221   IntVector argmax_col_values( num_cols, false );
1222   RealVector temp_row( num_cols, false );
1223   RealVector temp_col( num_rows, false );
1224   for ( int k = 0; k < std::min( num_rows-1, num_cols ); k++ )
1225     {
1226       Real max_pivot = -1.;
1227       int pivot_column = k, pivot_row = k;
1228       max_col_values = -1.;
1229       for ( int j = k; j < num_cols; j++ )
1230 	{
1231 	  for ( int i = k; i < num_rows; i++ )
1232 	    {
1233 	      Real pivot = std::abs( A_copy(i,j) );
1234 	      if ( pivot > max_col_values[j-k] )
1235 		{
1236 		  max_col_values[j-k] = pivot;
1237 		  argmax_col_values[j-k] = i-k;
1238 		  if ( pivot > max_pivot )
1239 		    {
1240 		      max_pivot = pivot;
1241 		      pivot_column = j-k;
1242 		    }
1243 		}
1244 	    }
1245 	}
1246       pivot_row = argmax_col_values[pivot_column]+k;
1247       pivot_column += k;
1248 
1249       // update pivot vectors
1250       int temp_index = row_pivots[k];
1251       row_pivots[k] = row_pivots[pivot_row];
1252       row_pivots[pivot_row] = temp_index;
1253       temp_index = column_pivots[k];
1254       column_pivots[k] = column_pivots[pivot_column];
1255       column_pivots[pivot_column] = temp_index;
1256 
1257       // swap rows
1258       for ( int j = 0; j < num_cols; j++ )
1259 	{
1260 	  Real temp_value = A_copy(k,j);
1261 	  A_copy(k,j) = A_copy(pivot_row,j);
1262 	  A_copy(pivot_row,j) = temp_value;
1263 	}
1264 
1265       // swap columns
1266       for ( int i = 0; i < num_rows; i++ )
1267 	{
1268 	  Real temp_value = A_copy(i,k);
1269 	  A_copy(i,k) = A_copy(i,pivot_column);
1270 	  A_copy(i,pivot_column) = temp_value;
1271 	}
1272 
1273       // check for singularity
1274       if ( std::abs( A_copy(k,k) ) < 1e-10 )
1275 	{
1276 	  std::cout << "pivot " << std::abs( A_copy(k,k) ) << " is to small. "
1277 		    << "Stopping factorization.\n";
1278 	  break;
1279 	}
1280 
1281       // update LU factoization
1282       for ( int i = k+1; i < num_rows; i++ )
1283 	A_copy(i,k) /= A_copy(k,k);
1284 
1285       RealMatrix sub_matrix( Teuchos::View, A_copy, num_rows-k-1, num_cols-k-1,
1286 			     k+1, k+1 );
1287       RealMatrix col_vector( Teuchos::View, A_copy, num_rows-k-1, 1, k+1, k );
1288       RealMatrix row_vector( Teuchos::View, A_copy, 1, num_cols-k-1, k, k+1 );
1289 
1290       sub_matrix.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
1291 			   -1.0, col_vector, row_vector, 1.0 );
1292 
1293       if ( k >= max_iters )
1294 	break;
1295     }
1296 
1297   // final column swap if num_cols > num_rows
1298   if ( num_cols > num_rows && num_cols >= max_iters )
1299     {
1300       // determine pivot which is the maximum entry in A_copy
1301       Real max_entry = 0.;
1302       int pivot_column = num_rows-1;
1303       for ( int j = num_rows-1; j < num_cols; j++ )
1304 	{
1305 	  Real pivot = std::abs( A_copy(num_rows-1,j) );
1306 	  if ( pivot >= max_entry )
1307 	    {
1308 	      max_entry = pivot;
1309 	      pivot_column = j-(num_rows-1);
1310 	    }
1311 	}
1312       pivot_column += num_rows-1;
1313 
1314 
1315       // update pivot column vector
1316       int temp_index = column_pivots[num_rows-1];
1317       column_pivots[num_rows-1] = column_pivots[pivot_column];
1318       column_pivots[pivot_column] = temp_index;
1319 
1320       // swap column
1321       for ( int i = 0; i < num_rows; i++ )
1322 	{
1323 	  Real temp_value = A_copy(i,num_rows-1);
1324 	  A_copy(i,num_rows-1) = A_copy(i,pivot_column);
1325 	  A_copy(i,pivot_column) = temp_value;
1326 	}
1327     }
1328 
1329   // Build L and U matrices
1330   int max_num_rows_cols = std::min( num_rows, num_cols );
1331   L_factor.shape( num_rows, max_num_rows_cols );
1332   U_factor.shape( max_num_rows_cols, num_cols );
1333   for ( int j = 0; j < max_num_rows_cols; j++ )
1334     {
1335       L_factor(j,j) = 1.;
1336       for ( int i = j+1; i < num_rows; i++ )
1337 	L_factor(i,j) = A_copy(i,j);
1338     }
1339 
1340   for ( int j = 0; j < num_cols; j++ )
1341     {
1342       for ( int i = 0; i < std::min( num_rows, j+1 ); i++ )
1343 	U_factor(i,j) = A_copy(i,j);
1344     }
1345 };
1346 
1347 
1348 /*
1349 Does not seem to work
1350 void complete_pivoted_lu_factorization_lapack( RealMatrix &A,
1351 					RealMatrix &L_factor,
1352 					RealMatrix &U_factor,
1353 					IntVector &row_pivots,
1354 					IntVector &column_pivots )
1355 {
1356   RealMatrix A_copy( Teuchos::Copy, A, A.numRows(), A.numCols() );
1357   int M( A.numRows() ), N( A.numCols() );
1358 
1359   if ( M != N )
1360     throw( std::runtime_error("complete_pivoted_lu_factorization: matrix must be square" ) );
1361 
1362   row_pivots.sizeUninitialized( N );
1363   column_pivots.sizeUninitialized( N );
1364   row_pivots = -2;
1365   // dgetc2_ does not update pivots correctly seems to exit early.
1366 
1367   int lda = A_copy.stride(), info;
1368   disp( N );
1369   disp( lda );
1370   DGETC2_F77( &N, A_copy.values(), &lda, row_pivots.values(),
1371 	      column_pivots.values(), &info );
1372 
1373   disp( info );
1374   disp( row_pivots );
1375   if ( info > 0 )
1376     {
1377       std::stringstream msg;
1378       msg << "U(" <<  info-1 << "," << info-1 << ") is likely to produce " <<
1379 	"overflow if we try to solve for x in Ax=b. So U is peturbed to avoid "
1380 	"overflow.\n";
1381       std::cout << msg.str();
1382     }
1383 
1384   L_factor.shape( N, N );
1385   U_factor.shape( N, N );
1386 
1387   // Fill U
1388   for ( int n = 0; n < N; n++ )
1389     {
1390       for ( int m = 0; m < n; m++ )
1391 	U_factor(m,n) = A_copy(m,n);
1392     }
1393 
1394   // Fill L
1395   for ( int n = 0; n < N; n++ )
1396     {
1397       L_factor(n,n) = 1.;
1398       for ( int m = n+1; m < N; m++ )
1399 	L_factor(m,n) = A_copy(m,n);
1400     }
1401 
1402   // fortran returns indices 1,...,N
1403   // c++ requires 0,...,N-1
1404   for ( int n = 0; n < N; n++ )
1405     {
1406       row_pivots[n]--;
1407       column_pivots[n]--;
1408     }
1409 }
1410 */
1411 
pivot_matrix_rows(const RealMatrix & A,const IntVector & pivots,int dir,bool fortran_indexing,RealMatrix & result)1412 void pivot_matrix_rows( const RealMatrix &A, const IntVector &pivots, int dir,
1413 			bool fortran_indexing, RealMatrix &result )
1414 {
1415   result.shapeUninitialized( A.numRows(), A.numCols() );
1416   result.assign( A );
1417   // Add 1 to pivots because we are using fotran indexing
1418   IntVector pivots_copy( pivots.length(), false );
1419   int shift = 1;
1420   if ( fortran_indexing ) shift = 0;
1421   for ( int i = 0; i < pivots.length(); i++ )
1422     pivots_copy[i] = pivots[i]+shift;
1423   // if dir is negative pivots are applied in reverse order
1424   int N = result.numCols(), LDA = result.stride(), K1 = 1,
1425     K2 = pivots_copy.numRows(), INCX = dir;
1426   DLASWP_F77( &N, result.values(), &LDA, &K1, &K2, pivots_copy.values(), &INCX );
1427 }
1428 
1429 
truncated_pivoted_lu_factorization(const RealMatrix & A,RealMatrix & L_factor,RealMatrix & U_factor,IntVector & pivots,int max_iters,int num_initial_rows)1430 void truncated_pivoted_lu_factorization( const RealMatrix &A,
1431 					 RealMatrix &L_factor,
1432 					 RealMatrix &U_factor,
1433 					 IntVector &pivots,
1434 					 int max_iters,
1435 					 int num_initial_rows )
1436 {
1437 
1438   Teuchos::BLAS<int, Real> blas;
1439   int num_rows = A.numRows(), num_cols = A.numCols();
1440   int min_num_rows_cols = std::min( num_rows, num_cols );
1441   max_iters = std::min( max_iters, num_rows );
1442   if ( A.numCols() < max_iters ){
1443     std::string msg = "truncated_pivoted_lu_factorization: ";
1444     msg += " A is inconsistent with max_iters. Try deceasing max_iters or ";
1445     msg += " increasing the number of columns of A";
1446     throw(std::runtime_error( msg ) );
1447   }
1448 
1449   // Use L to store both L and U during factoriation then copy out U in post
1450   // proccesing
1451   L_factor.shapeUninitialized( num_rows, min_num_rows_cols );
1452   L_factor.assign( A );
1453   pivots.sizeUninitialized( num_rows );
1454   for ( int i = 0; i < num_rows; i++ )
1455     pivots[i] = i;
1456 
1457   int iter = 0;
1458   for ( iter = 0; iter < min_num_rows_cols; iter++ )
1459     {
1460       // find best pivot
1461       int pivot =-1;
1462       if ( iter < num_initial_rows ){
1463 	pivot = blas.IAMAX( num_initial_rows-iter, &(L_factor[iter][iter]), 1 )
1464 	  -1+iter;
1465       }else{
1466 	pivot = blas.IAMAX( num_rows-iter, &(L_factor[iter][iter]), 1 )-1+iter;
1467       }
1468 
1469       // update pivots vector
1470       int temp_index = pivots[iter];
1471       pivots[iter] = pivots[pivot];
1472       pivots[pivot] = temp_index;
1473       // apply pivots(swap rows) in L factorization
1474       for ( int j = 0; j < num_cols; j++ ){
1475 	Real temp_value = L_factor(iter,j);
1476 	L_factor(iter,j) = L_factor(pivot,j);
1477 	L_factor(pivot,j) = temp_value;
1478       }
1479 
1480       // check for singularity
1481       if ( std::abs(L_factor(iter,iter))<std::numeric_limits<double>::epsilon()){
1482 	std::cout << "pivot " << std::abs( L_factor(iter,iter) )
1483 		  << " is to small. Stopping factorization.\n";
1484 	break;
1485       }
1486       // update L_factor factoization
1487       for ( int i = iter+1; i < num_rows; i++ )
1488 	L_factor(i,iter) /= L_factor(iter,iter);
1489 
1490       RealMatrix sub_matrix( Teuchos::View, L_factor, num_rows-iter-1,
1491 			     num_cols-iter-1,
1492 			     iter+1, iter+1 );
1493       RealMatrix col_vector( Teuchos::View, L_factor, num_rows-iter-1, 1,
1494 			     iter+1, iter );
1495       RealMatrix row_vector( Teuchos::View, L_factor, 1, num_cols-iter-1, iter,
1496 			     iter+1 );
1497 
1498       sub_matrix.multiply( Teuchos::NO_TRANS, Teuchos::NO_TRANS,
1499 			   -1.0, col_vector, row_vector, 1.0 );
1500       if ( iter >= max_iters-1 )
1501 	break;
1502     }
1503 
1504   // build L and U matrices
1505 
1506   // extract entries of U from L
1507   U_factor.shape( min_num_rows_cols, num_cols );
1508   for ( int j = 0; j < num_cols; j++ )
1509     {
1510       for ( int i = 0; i < std::min( min_num_rows_cols, j+1 ); i++ )
1511 	U_factor(i,j) = L_factor(i,j);
1512     }
1513 
1514   // zero out U entries in L
1515   L_factor.reshape( iter+1, min_num_rows_cols );
1516   for ( int j = 0; j < min_num_rows_cols; j++ )
1517     {
1518       if ( j < iter+1 ) L_factor(j,j) = 1.;
1519       for ( int i = 0; i<std::min(j,iter+1); i++ )
1520 	L_factor(i,j) = 0.;
1521     }
1522   // remove unused pivot entries
1523   pivots.resize( iter+1 );
1524 }
1525 
1526 }  // namespace util
1527 }  // namespace Pecos
1528