1 //////////////////////////////////////////////////////////////////////////////////////////////////
2 // \brief a class to perform solve generalized eigenvalue problem by SPAM
3 //
4 //
5 //
6 //  solve with spam method
7 //////////////////////////////////////////////////////////////////////////////////////////////////
8 
9 #include <vector>
10 #include <string>
11 #include <numeric>
12 #include <cassert>
13 #include <algorithm>
14 #include <cmath>
15 #include <complex>
16 #include <iostream>
17 //#include <mpi.h>
18 
19 #include <boost/format.hpp>
20 #include <boost/shared_ptr.hpp>
21 
22 #include "formic/utils/exception.h"
23 #include "formic/utils/matrix.h"
24 #include "formic/utils/lapack_interface.h"
25 #include "formic/utils/mpi_interface.h"
26 #include "formic/utils/lmyengine/eigen_solver.h"
27 #include "spam_solver.h"
28 
29 //////////////////////////////////////////////////////////////////////////////////////////////////
30 // \brief solve the subspace generalized eigenvalue problem
31 //        with nonsymmetric H and symmetric S for outer loop
32 //
33 //
34 //////////////////////////////////////////////////////////////////////////////////////////////////
35 
solve_subspace_nonsymmetric(const bool outer)36 void cqmc::engine::SpamLMHD::solve_subspace_nonsymmetric(const bool outer)
37 {
38 
39   // one in complex form
40   const std::complex<double> complex_one(1.0, 0.0);
41   const std::complex<double> complex_zero(0.0, 0.0);
42 
43   const int m  = _nkry;
44 
45   // create vectors and matrices used in svd routine
46   formic::Matrix<double> u;
47   formic::Matrix<double> vt;
48   formic::ColVec<double> sin_vals;
49   int truncate_index = 0;
50 
51   // make sure the subspace matrix is not empty
52   if ( outer ) {
53     if ( _subS.rows() == 0 || _subH.rows() == 0 )
54       throw formic::Exception("subspace matrix is empty upon solving subspace eigenvalue problem(outer)");
55   }
56 
57   else {
58     if ( _hy_subS.rows() == 0 || _hy_subH.rows() == 0 )
59       throw formic::Exception("subspace matrix is empty upon solving subspace eigenvalue problem(inner)");
60   }
61 
62   // perform svd to subspace overlap matrix
63   if ( outer )
64     _subS.svd(u, sin_vals, vt);
65   else
66     _hy_subS.svd(u, sin_vals, vt);
67   formic::Matrix<double> v = vt.t();
68 
69   // record the smallest singular value of subspace overlap matrix
70   if ( outer )
71     _smallest_sin_value_outer = std::abs(sin_vals.at(0));
72   else
73     _smallest_sin_value_inner = std::abs(sin_vals.at(0));
74   for (int i = 0; i < m; i++) {
75     if ( outer )
76       _smallest_sin_value_outer = std::min(_smallest_sin_value_outer, std::abs(sin_vals.at(i)));
77     else
78       _smallest_sin_value_inner = std::min(_smallest_sin_value_inner, std::abs(sin_vals.at(i)));
79     truncate_index = i;
80 
81     // check if singular value is smaller than singular value threshold
82     if ( outer && _smallest_sin_value_outer < _singular_value_threshold)
83       break;
84     else if ( !outer && _smallest_sin_value_inner < _singular_value_threshold)
85       break;
86   }
87 
88   // get the number of colums of new U and V matrix by add 1 to truncate index
89   truncate_index ++;
90 
91   // throw away those columns in U and V matrix which corresponds to singular values below the threshold
92   u.conservativeResize(m, truncate_index);
93   v.conservativeResize(m, truncate_index);
94 
95   // throw away those small singular values
96   sin_vals.conservativeResize(truncate_index);
97 
98   // convert the truncated singular value vector to a truncated_index * truncated_index diagonal matrix
99   formic::Matrix<double> trun_sin_val_matrix(truncate_index, truncate_index, 0.0);
100   for (int i = 0; i < truncate_index; i++)
101     trun_sin_val_matrix.at(i,i) = sin_vals.at(i);
102 
103   // calculate the inverse of this matrix
104   for(int i = 0; i < truncate_index; i++){
105     for(int j = 0; j < truncate_index; j++){
106       trun_sin_val_matrix.at(i,j) = (trun_sin_val_matrix.at(i,j) == 0 ? trun_sin_val_matrix.at(i,j) : 1 / trun_sin_val_matrix.at(i,j));
107     }
108   }
109 
110   // calculate matrix S_trun^-1 * U^-1 * H * V
111   formic::Matrix<double> new_sub_H = trun_sin_val_matrix * u.t() * (outer ? _subH : _hy_subH) * v;
112 
113   // set a vector to hold eigenvalues
114   formic::ColVec<std::complex<double> > e_evals;
115 
116   // set an eigen array to hold energies
117   formic::ColVec<std::complex<double> > energy_list;
118 
119   // a matrix to hold eigenvectors
120   formic::Matrix<std::complex<double> > evecs_list;
121 
122   // solve this standard eigenvalue problem ( new_H * y = lambda * y, y = V^T * x, x is the original eigenvector)
123   new_sub_H.nonsym_eig(e_evals, evecs_list);
124 
125   // if we are doing excited state calculations, convert the resulting eigenvalues to energy( this is important in harmonic davidson)
126   if ( !_ground ) {
127     energy_list = _hd_shift * complex_one - complex_one / e_evals;
128   }
129 
130   // if we are doing ground state calculation, then do nothing
131   if ( _ground ) {
132     energy_list = e_evals.clone();
133   }
134 
135   int selected = 0;
136   // if we want to chase the closest, selected the eigenvalue that is real and most similar to the previous eigenvalue( this is essestially an attempt to stay in the same solution)
137   if ( _chase_closest ) { // currently turn on this if statement for spam
138     std::complex<double> closest_energy = energy_list.at(0);
139     std::complex<double> corrs_eval = e_evals.at(0);
140     for (int j = 1; j < truncate_index; j++){
141       // first check if this energy is real
142       if ( std::abs((energy_list.at(j)).imag()) < 1.0e-6 ) {
143 
144         // then select the energy that is closest to the previous one
145         if(std::abs( complex_one * (outer ? _energy_outer : _energy_inner) - energy_list.at(j) ) < std::abs(complex_one * (outer ? _energy_outer : _energy_inner) - closest_energy )){
146           selected = j;
147           closest_energy = energy_list.at(j);
148           corrs_eval = e_evals.at(j);
149         }
150       }
151     }
152 
153     // if the eigenvalue has an imaginary component, we abort
154     _eval_was_complex = false;
155     if( std::abs(closest_energy.imag()) > 1.0e-6 ) {
156       _eval_was_complex = true;
157       return;
158     }
159 
160     // if the eigenvalue is real, we record it and the corresponding eigenvector
161     if ( outer ) {
162       // record energy
163       _energy_outer = closest_energy.real();
164 
165       // record the eigenvalue
166       _sub_eval_outer = corrs_eval.real();
167     }
168 
169     else {
170       // record energy
171       _energy_inner = closest_energy.real();
172 
173       // record the eigenvalue
174       _sub_eval_inner = corrs_eval.real();
175     }
176 
177     // record the eigenvector y
178     _wv4 = evecs_list.col_as_vec(selected);
179 
180     // convert y to x
181     _wv5.reset(m);
182     // convert v to complex form to make sure that all quantities in dgemm call are of the same type
183     formic::Matrix<std::complex<double> > v_complex(v.rows(), v.cols(), complex_zero);
184     for (int i = 0; i < v_complex.rows(); i++) {
185       for (int j = 0; j < v_complex.cols(); j++) {
186         v_complex.at(i,j) = std::complex<double>(v.at(i,j), 0.0);
187       }
188     }
189     formic::xgemm('N', 'N', m, 1, truncate_index, complex_one, &v_complex.at(0,0), m, &_wv4.at(0), truncate_index, complex_zero, &_wv5.at(0), m);
190     //_wv5 = V * _wv4;
191 
192     // take real part of the vector x, put that into eigenvector
193     (outer ? _sub_evec_outer : _sub_evec_inner).reset(m);
194     for (int i = 0; i < m; i++) {
195       (outer ? _sub_evec_outer : _sub_evec_inner)(i) = _wv5.at(i).real();
196     }
197   }
198 
199   // if we want to chase the lowest, select the lowest eigenvalue(currently turn off this if statement for spam)
200   if ( _chase_lowest ) {
201 
202     // the vector that stores all lower-than-current energy(target function)
203     std::vector<std::complex<double> > lower_than_current_list;
204     std::vector<int> lower_than_current_index;
205     //Eigen::ArrayXcd eval_list = (es.eigenvalues()).array();
206     std::complex<double> lowest_eval = e_evals.at(0);
207 
208     double inner_eval = 0.0;
209     if ( !_ground )
210       inner_eval = 1.0 / (_hd_shift - _energy_inner);
211     else
212       inner_eval = _energy_inner;
213 
214     // if it's outer iteration, we just make sure that we choose the lowest energy(target function)
215     //if ( outer ) {
216     for (int j = 1; j < truncate_index; j++) {
217       if (e_evals.at(j).real() < lowest_eval.real()) {
218         selected = j;
219         lowest_eval = e_evals.at(j);
220       }
221     }
222     //}
223 
224     // if it's inner iteration
225     //else {
226     //
227     //  // we first need to get all energy(target function) lower than current
228     //  for (int j = 0; j < truncate_index; j++) {
229     //    if (eval_list(j).real() < inner_eval) {
230     //      lower_than_current_list.push_back(eval_list(j));
231     //      lower_than_current_index.push_back(j);
232     //    }
233     //  }
234 
235     //  lowest_eval = lower_than_current_list.at(0);
236     //  selected = lower_than_current_index.at(0);
237     //  // then we select from lower list the closest energy(target function)
238     //  for (int i = 1; i < lower_than_current_list.size(); i++) {
239     //
240     //    // first check if this energy is real
241     //    if ( std::abs((lower_than_current_list.at(i)).imag()) < 1.0e-6 ) {
242 
243     //      // then select energy
244     //      if ( std::abs( complex_one * inner_eval - lower_than_current_list.at(i) ) < std::abs(complex_one * inner_eval - lowest_eval )) {
245     //        selected = lower_than_current_index.at(i);
246     //        lowest_eval = lower_than_current_list.at(i);
247     //      }
248     //    }
249     //  }
250     //}
251 
252 
253     // if the eigenvalue has an imaginary component, we abort
254     _eval_was_complex = false;
255     if ( std::abs(lowest_eval.imag()) > 1.0e-6 ) {
256       _eval_was_complex = true;
257       return;
258     }
259 
260     // if the eigenvalue is real, we record it
261     if ( !_ground ) {
262       if ( outer )
263         _energy_outer = _hd_shift - 1.0 / lowest_eval.real();
264       else
265         _energy_inner = _hd_shift - 1.0 / lowest_eval.real();
266     }
267 
268     else {
269       if ( outer )
270         _energy_outer = lowest_eval.real();
271       else
272         _energy_inner = lowest_eval.real();
273     }
274 
275     // record the eigenvalue
276     (outer ? _sub_eval_outer : _sub_eval_inner) = lowest_eval.real();
277 
278     // record the eigenvector y
279     _wv4 = evecs_list.col_as_vec(selected);
280 
281     // convert y to x
282     _wv5.reset(m);
283     // convert v to complex form to make sure that all quantities in dgemm call are of the same type
284     formic::Matrix<std::complex<double> > v_complex(v.rows(), v.cols(), complex_zero);
285     for (int i = 0; i < v_complex.rows(); i++) {
286       for (int j = 0; j < v_complex.cols(); j++) {
287         v_complex.at(i,j) = std::complex<double>(v.at(i,j), 0.0);
288       }
289     }
290     formic::xgemm('N', 'N', m, 1, truncate_index, complex_one, &v_complex.at(0,0), m, &_wv4.at(0), truncate_index, complex_zero, &_wv5.at(0), m);
291     //_wv5 = V * _wv4;
292 
293     // take real part of vector x, put that into eigenvector
294     (outer ? _sub_evec_outer : _sub_evec_inner).reset(m);
295     for (int i = 0; i < m; i++) {
296       (outer ? _sub_evec_outer : _sub_evec_inner)(i) = _wv5.at(i).real();
297     }
298   }
299 }
300 
301 ///////////////////////////////////////////////////////////////////////////////
302 // \brief solves the subspace eigenvalue problem
303 //
304 //
305 //
306 ///////////////////////////////////////////////////////////////////////////////
307 
solve_subspace(const bool outer)308 void cqmc::engine::SpamLMHD::solve_subspace(const bool outer)
309 {
310   this -> solve_subspace_nonsymmetric(outer);
311 }
312 
313 /////////////////////////////////////////////////////////////////////////////////
314 // \brief adds a new krylov vector for inner loop of spam
315 //
316 //
317 //
318 //
319 /////////////////////////////////////////////////////////////////////////////////
320 
add_krylov_vector_inner(const formic::ColVec<double> & v)321 void cqmc::engine::SpamLMHD::add_krylov_vector_inner(const formic::ColVec<double> & v)
322 {
323 
324   int my_rank = formic::mpi::rank();
325 
326   // check vector length
327   if (my_rank == 0 && v.size() != _der_rat.cols())
328     throw formic::Exception("bad vector length of %d in SpamLMHD::add_krylov_vector_outer: expected length of %d") % v.size() % _der_rat.cols();
329 
330   // increment krylov subspace size and remember the old size
331   const int nold = _nkry++;
332 
333   _wv1.reset(v.size());
334   _wv1 = v;
335 
336   // perform gram-schmidt orthogonalization against the existing krylov vectors
337   if (my_rank == 0) {
338     for (int i = 0; i < nold; i++) {
339       _wv1 -= dotc(_kvecs.col_as_vec(i), _wv1) * _kvecs.col_as_vec(i);
340     }
341   }
342 
343   // broadcast this vector to all processes
344   formic::mpi::bcast(&_wv1.at(0), _wv1.size());
345 
346   // compute the product of approximate hamiltonian times the new krylov vector
347   formic::ColVec<double> hs(_nfds);
348   this -> HMatVecOp(_wv1, hs, false, true);
349   formic::ColVec<double> hs_avg(hs.size());
350   formic::mpi::reduce(&hs.at(0), &hs_avg.at(0), hs.size(), MPI_SUM);
351   hs = hs_avg.clone();
352 
353   // compute the product of approximate overlap matrix times this new krylov vector
354   this -> SMatVecOp(_wv1, _wv2, true);
355   formic::ColVec<double> _wv2_avg(_wv2.size());
356   formic::mpi::reduce(&_wv2.at(0), &_wv2_avg.at(0), _wv2.size(), MPI_SUM);
357   _wv2 = _wv2_avg.clone();
358 
359 
360   // modify the hamiltonian product to account for "identity shift"
361   if (my_rank == 0) {
362     for (int i = 1; i < hs.size(); i++) {
363       hs.at(i) += _hshift_i * _wv1.at(i);
364     }
365   }
366 
367   // modify hamiltonian product to account for "overlap" shift
368   if (my_rank == 0 && nold > 0) {
369     hs += _hshift_s * _wv2;
370   }
371 
372 
373   // normalize the new krylov vector and save the vector and its operation on overlap matrix
374   if (my_rank == 0) {
375     const double norm = std::sqrt(_wv1.norm2());
376     _wv1 /= norm;
377     _wv2 /= norm;
378     hs /= norm;
379 
380     // krylov space
381     _kvecs.conservativeResize(_nfds, _nkry);
382     std::copy(_wv1.begin(), _wv1.end(), _kvecs.col_begin(_nkry-1));
383 
384     // S_hybrid * krylov space
385     _hsvecs.conservativeResize(_nfds, _nkry);
386 
387     // temp vector that store (S * X)^T * xnew by blas level-2
388     formic::ColVec<double> new_col_s_temp1(_nkry_full);
389     formic::dgemv('T', _nfds, _nkry_full, 1.0, &_svecs.at(0,0), _nfds, &_wv1.at(0), 1, 0.0, &new_col_s_temp1.at(0), 1);
390 
391     // tenp vector that store X * (S * X)^T * xnew + S(1) * xnew
392     formic::ColVec<double> new_col_s_temp2(_nfds);
393     formic::dgemv('N', _nfds, _nkry_full, 1.0, &_kvecs.at(0,0), _nfds, &new_col_s_temp1.at(0), 1, 0.0, &new_col_s_temp2.at(0), 1);
394     //daxpy(_nfds, 1.0, &_wv2(0), 1, &new_col_s_temp2(0), 1);
395     new_col_s_temp2 += _wv2;
396 
397     // temp vector that store X * S(1)*xnew
398     formic::ColVec<double> new_col_s_temp3(_nkry_full);
399     formic::dgemv('T', _nfds, _nkry_full, 1.0, &_kvecs.at(0,0), _nfds, &_wv2.at(0), 1, 0.0, &new_col_s_temp3.at(0), 1);
400 
401     formic::ColVec<double> new_col_s(_nfds);
402     formic::dgemv('N', _nfds, _nkry_full, 1.0, &_kvecs.at(0,0), _nfds, &new_col_s_temp3.at(0), 1, 0.0, &new_col_s.at(0), 1);
403     //daxpy(_nfds, -1.0, &new_col_s(0), 1, &new_col_s_temp2(0), 1);
404     new_col_s = -1.0 * new_col_s + new_col_s_temp2;
405 
406     std::copy(new_col_s.begin(), new_col_s.end(), _hsvecs.col_begin(_nkry-1));
407 
408     // H_hybrid * krylov space
409     _hhvecs.conservativeResize(_nfds, _nkry);
410 
411     // temp vector that store (H^T * X)^T * xnew
412     formic::ColVec<double> new_col_h_temp1(_nkry_full);
413     formic::dgemv('T', _nfds, _nkry_full, 1.0, &_thvecs.at(0,0), _nfds, &_wv1.at(0), 1, 0.0, &new_col_h_temp1.at(0), 1);
414 
415     // temp vector that store X * (H^T * X)^T * xnew + H(1)*xnew
416     formic::ColVec<double> new_col_h_temp2(_nfds);
417     formic::dgemv('N', _nfds, _nkry_full, 1.0, &_kvecs.at(0,0), _nfds, &new_col_h_temp1.at(0), 1, 0.0, &new_col_h_temp2.at(0), 1);
418     //daxpy(_nfds, 1.0, &hs(0), 1, &new_col_h_temp2(0), 1);
419     new_col_h_temp2 += hs;
420 
421     // temp vector that store X^T * H(1)*xnew
422     formic::ColVec<double> new_col_h_temp3(_nkry_full);
423     formic::dgemv('T', _nfds, _nkry_full, 1.0, &_kvecs.at(0,0), _nfds, &hs.at(0), 1, 0.0, &new_col_h_temp3.at(0), 1);
424 
425     formic::ColVec<double> new_col_h(_nfds);
426     formic::dgemv('N', _nfds, _nkry_full, 1.0, &_kvecs.at(0,0), _nfds, &new_col_h_temp3.at(0), 1, 0.0, &new_col_h.at(0), 1);
427     //daxpy(_nfds, -1.0, &new_col_h(0), 1, &new_col_h_temp2(0), 1);
428     new_col_h = -1.0 * new_col_h + new_col_h_temp2;
429 
430     std::copy(new_col_h.begin(), new_col_h.end(), _hhvecs.col_begin(_nkry-1));
431 
432     // update subspace projection of hybrid Hamiltonian
433     _hy_subH.conservativeResize(_nkry, _nkry);
434     _wv3 = _wv1.t() * _hhvecs;
435     _wv6 = _kvecs.t() * new_col_h;
436     std::copy(_wv3.begin(), _wv3.end(), _hy_subH.row_begin(_nkry-1));
437     std::copy(_wv6.begin(), _wv6.end(), _hy_subH.col_begin(_nkry-1));
438     //_hy_subH.bottomRows(1) = _wv3;
439     //_hy_subH.rightCols(1) = _wv6;
440 
441     // update subspace projection of hybrid overlap
442     _hy_subS.conservativeResize(_nkry, _nkry);
443     _wv3 = _wv1.t() * _hsvecs;
444     _wv6 = _kvecs.t() * new_col_s;
445 
446     std::copy(_wv3.begin(), _wv3.end(), _hy_subS.row_begin(_nkry-1));
447     std::copy(_wv6.begin(), _wv6.end(), _hy_subS.col_begin(_nkry-1));
448     //_hy_subS.bottomRows(1) = _wv3;
449     //_hy_subS.rightCols(1) = _wv6;
450 
451     // add this vector to intermediate krylov space
452     _kvecs_about_to_add.conservativeResize(_nfds, _nkry - _nkry_full);
453      std::copy(_wv1.begin(), _wv1.end(), _kvecs_about_to_add.col_begin(_nkry-_nkry_full-1));
454     //_kvecs_about_to_add.rightCols(1) = _wv1;
455   }
456 
457 
458 }
459 
460 ////////////////////////////////////////////////////////////////////////////////////
461 // \brief adds a bunch of new Krylov vectors for spam outer loop
462 //
463 // NOTE: This function assumes that the input vectors are already orthonormal!!
464 //
465 //
466 ////////////////////////////////////////////////////////////////////////////////////
add_krylov_vectors_outer(const formic::Matrix<double> & m)467 void cqmc::engine::SpamLMHD::add_krylov_vectors_outer(const formic::Matrix<double> & m)
468 {
469 
470   int my_rank = formic::mpi::rank();
471 
472   // check matrix size
473   if (my_rank == 0 && m.rows() != _der_rat.cols())
474     throw formic::Exception("bad matrix size of %d in SpamLMHD::add_krylov_vector_outer: expected length of %d") % m.rows() % _der_rat.cols();
475 
476   // get the number of new krylov vectors
477   const int Nnew = m.cols();
478 
479   // remember the old size
480   const int nold = _nkry;
481 
482   // if this is the first krylov vector, we increment the number of krylov vectors by the number of new krylov vectors
483   if ( nold == 0 )
484     _nkry += Nnew;
485 
486   // record the number of krylov vectors that have been multiplied by full hamiltonian and overlap, which is different from the total number of krylov vectors
487   _nkry_full = _nkry;
488 
489   // put input matrix into work matrix
490   _wm1.reset(m.rows(), Nnew);
491   _wm1 = m;
492 
493   // compute the product of Hamiltonian times these new krylov vectors
494   formic::Matrix<double> hs(_nfds, Nnew);
495   this -> HMatMatOp(_wm1, hs, false, false);
496   formic::Matrix<double> hs_avg(_nfds, Nnew);
497   formic::mpi::reduce(&hs.at(0,0), &hs_avg.at(0,0), hs.size(), MPI_SUM);
498   hs = hs_avg.clone();
499 
500   // compute the product of Hamiltonian transpose times these new krylov vectors
501   formic::Matrix<double> ths(_nfds, Nnew);
502   this -> HMatMatOp(_wm1, ths, true, false);
503   formic::Matrix<double> ths_avg(ths.rows(), ths.cols());
504   formic::mpi::reduce(&ths.at(0,0), &ths_avg.at(0,0), ths.size(), MPI_SUM);
505   ths = ths_avg.clone();
506 
507   // compute the product of the overlap matrix times these new krylov vectors
508   this -> SMatMatOp(_wm1, _wm2, false);
509   formic::Matrix<double> _wm2_avg(_wm2.rows(), _wm2.cols());
510   formic::mpi::reduce(&_wm2.at(0,0), &_wm2_avg.at(0,0), _wm2.size(), MPI_SUM);
511   _wm2 = _wm2_avg.clone();
512 
513   // modify hamiltonian product to account for "identity" shift
514   if (my_rank == 0) {
515     for (int i = 1; i < Nnew; i++) {
516       for (int j = 0; j < hs.rows(); j++) {
517         hs.at(j,i) += _hshift_i * _wm1.at(j,i);
518         ths.at(j,i) += _hshift_i * _wm1.at(j,i);
519       }
520     }
521   }
522 
523   // modify hamiltonian product to account for "overlap" shift
524   if (my_rank == 0 && nold > 0) {
525     hs += _hshift_s * _wm2;
526     ths += _hshift_s * _wm2;
527   }
528 
529 
530 
531   // save these krylov vectors and their operation on matrix
532   if ( my_rank == 0 ) {
533 
534     // krylov space
535     if ( nold == 0 ) {
536       _kvecs.conservativeResize(_nfds, _nkry);
537       for (int i = 0; i < Nnew; i++)
538         std::copy(_wm1.col_begin(i), _wm1.col_end(i), _kvecs.col_begin(_nkry-Nnew+i));
539       //_kvecs.rightCols(Nnew) = _wm1;
540     }
541 
542     // H * krylov space
543     _hvecs.conservativeResize(_nfds, _nkry);
544     for (int i = 0; i < Nnew; i++)
545       std::copy(hs.col_begin(i), hs.col_end(i), _hvecs.col_begin(_nkry-Nnew+i));
546     //_hvecs.rightCols(Nnew) = hs;
547 
548     // H_hybrid * krylov space
549     _hhvecs.reset(_nfds, _nkry);
550     _hhvecs = _hvecs.clone();
551 
552     // H^T * krylov space
553     _thvecs.conservativeResize(_nfds, _nkry);
554     for (int i = 0; i < Nnew; i++)
555       std::copy(ths.col_begin(i), ths.col_end(i), _thvecs.col_begin(_nkry-Nnew+i));
556     //_thvecs.rightCols(Nnew) = ths;
557 
558     // S * krylov space
559     _svecs.conservativeResize(_nfds, _nkry);
560     for (int i = 0; i < Nnew; i++)
561       std::copy(_wm2.col_begin(i), _wm2.col_end(i), _svecs.col_begin(_nkry-Nnew+i));
562     //_svecs.rightCols(Nnew) = _wm2;
563 
564     // S_hybrid * krylov space
565     _hsvecs.reset(_nfds, _nkry);
566     _hsvecs = _svecs.clone();
567 
568     // update subspace projection of hamiltonian
569     _subH.conservativeResize(_nkry, _nkry);
570     _wm3 = _wm1.t() * _hvecs;
571     _wm4 = _kvecs.t() * hs;
572     for (int i = 0; i < Nnew; i++)
573       std::copy(_wm4.col_begin(i), _wm4.col_end(i), _subH.col_begin(_nkry-Nnew+i));
574 
575     for (int i = 0; i < Nnew; i++)
576       std::copy(_wm3.col_begin(i), _wm3.col_end(i), _subH.row_begin(_nkry-Nnew+i));
577     //_subH.bottomRows(Nnew) = _wm3;
578     //_subH.rightCols(Nnew) = _wm4;
579 
580     // set the hybrid Hamiltonian subspace projection matrix same as the full hamiltonian
581     _hy_subH.reset(_nkry, _nkry);
582     _hy_subH = _subH.clone();
583 
584     // update subspace projection of the overlap
585     _subS.conservativeResize(_nkry, _nkry);
586     _wm3 = _wm1.t() * _svecs;
587     _wm4 = _kvecs.t() * _wm2;
588     for (int i = 0; i < Nnew; i++)
589       std::copy(_wm4.col_begin(i), _wm4.col_end(i), _subS.col_begin(_nkry-Nnew+i));
590 
591     for (int i = 0; i < Nnew; i++)
592       std::copy(_wm3.col_begin(i), _wm3.col_end(i), _subS.row_begin(_nkry-Nnew+i));
593     //_subS.bottomRows(Nnew) = _wm3;
594     //_subS.rightCols(Nnew) = _wm4;
595 
596     // set the hybrid overlap subspace projection matrix same as the full overlap
597     _hy_subS.reset(_nkry, _nkry);
598     _hy_subS = _subS.clone();
599 
600   }
601 
602 }
603 
604 
605 ////////////////////////////////////////////////////////////////////////////////////
606 // \brief function that perfoms hamiltonian matrix-vector multiplication
607 //
608 // \param[in]   x              input vector
609 // \param[in]   matrix_built   whether we have already built the matrix or not
610 // \param[in]   transpose      whether to use transposed hamiltonian or not
611 // \param[in]   approximate    whether to use approximated hamiltonian or not
612 // \param[out]  y              result vector
613 //
614 ////////////////////////////////////////////////////////////////////////////////////
615 
HMatVecOp(const formic::ColVec<double> & x,formic::ColVec<double> & y,const bool transpose,const bool approximate)616 void cqmc::engine::SpamLMHD::HMatVecOp(const formic::ColVec<double> & x, formic::ColVec<double> & y, const bool transpose, const bool approximate)
617 {
618   // size the resulting vector correctly
619   y.reset(x.size());
620 
621 
622   // the number of samples on each process
623   int Ns = _le_der.rows();
624 
625   // if we multiply by approximated matrix, we need to change Ns by approximate degree
626   if ( approximate )
627     Ns /= _appro_degree;
628 
629   // the number of independent variables
630   const int Nind = _le_der.cols();
631 
632   // check whether derivative vector matrices have the same size
633   if ( _le_der.rows() != _der_rat.rows() && _le_der.cols() != _der_rat.cols())
634     throw formic::Exception("the input derivative vector matrices are of different size!");
635 
636   // if we are doing ground state calculation
637   if ( _ground && !approximate ) {
638 
639     // if we do H*x
640     if ( !transpose ) {
641 
642       // temp vector to store le_der * x
643       formic::ColVec<double> temp(Ns);
644 
645       // call blas level-2 function
646       formic::dgemv('N', Ns, Nind, 1.0, &_le_der.at(0,0), Ns, &x.at(0), 1, 0.0, &temp.at(0), 1);
647 
648       // call blas level-2 function
649       formic::dgemv('T', Ns, Nind, 1.0, &_der_rat.at(0,0), Ns, &temp.at(0), 1, 0.0, &y.at(0), 1);
650 
651       return;
652     }
653 
654     // if we do H^T * x
655     else {
656 
657       // temp vector that store der_rat * x
658       formic::ColVec<double> temp(Ns);
659 
660       // call blas level-2 function
661       formic::dgemv('N', Ns, Nind, 1.0, &_der_rat.at(0,0), Ns, &x.at(0), 1, 0.0, &temp.at(0), 1);
662 
663       // call blas level-2 function
664       formic::dgemv('T', Ns, Nind, 1.0, &_le_der.at(0,0), Ns, &temp.at(0), 1, 0.0, &y.at(0), 1);
665 
666       return;
667     }
668   }
669 
670 
671   // if we are doing ground state calculation and multiply by approximate matrix
672   else if ( _ground && approximate ) {
673 
674     // if we want H * x
675     if ( !transpose ) {
676 
677       // temp vector that stores le_der_appro * x
678       formic::ColVec<double> temp(Ns);
679 
680       // call blas level-2 function
681       formic::dgemv('N', Ns, Nind, _appro_factor, &_le_der_appro.at(0,0), Ns, &x.at(0), 1, 0.0, &temp.at(0), 1);
682 
683       // call blas level-2 function
684       formic::dgemv('T', Ns, Nind, 1.0, &_der_rat_appro.at(0,0), Ns, &temp.at(0), 1, 0.0, &y.at(0), 1);
685 
686       return;
687     }
688 
689     // if we want H^T * x
690     else {
691 
692       // temp vector that stores der_rat * x
693       formic::ColVec<double> temp(Ns);
694 
695       // call blas level-2 function
696       formic::dgemv('N', Ns, Nind, _appro_factor, &_der_rat_appro.at(0,0), Ns, &x.at(0), 1, 0.0, &temp.at(0), 1);
697 
698       // call blas level-2 function
699       formic::dgemv('T', Ns, Nind, 1.0, &_le_der_appro.at(0,0), Ns, &temp.at(0), 1, 0.0, &y.at(0), 1);
700 
701       return;
702     }
703   }
704 
705   // if we are doing excited state calculation and full matrix
706   else if ( !_ground && !approximate ) {
707 
708     // if we want H*x
709     if ( !transpose ) {
710 
711       // temp vectpr to store omega * der_rat * x
712       formic::ColVec<double> temp1(Ns);
713 
714       // temp vector to store le_der * x
715       formic::ColVec<double> temp2(Ns);
716 
717       // call blas level-2 function
718       formic::dgemv('N', Ns, Nind, _hd_shift, &_der_rat.at(0,0), Ns, &x.at(0), 1, 0.0, &temp1.at(0), 1);
719 
720       // call blas level-2 function
721       formic::dgemv('N', Ns, Nind, 1.0, &_le_der.at(0,0), Ns, &x.at(0), 1, 0.0, &temp2.at(0), 1);
722 
723       // combine these two temp vector together
724       temp1 -= temp2;
725 
726       // left multiply by _der_rat^T
727       formic::dgemv('T', Ns, Nind, 1.0, &_der_rat.at(0,0), Ns, &temp1.at(0), 1, 0.0, &y.at(0), 1);
728 
729       return;
730     }
731 
732     // if we want H^T * x
733     else {
734 
735       // temp vector that store _der_rat * x
736       formic::ColVec<double> temp1(Ns);
737 
738       // call blas level-2 function
739       formic::dgemv('N', Ns, Nind, 1.0, &_der_rat.at(0,0), Ns, &x.at(0), 1, 0.0, &temp1.at(0), 1);
740 
741       // temp vector that store _le_der^T * _der_rat * x
742       formic::ColVec<double> temp2(Nind);
743 
744       // call blas level-2 function
745       formic::dgemv('T', Ns, Nind, 1.0, &_le_der.at(0,0), Ns, &temp1.at(0), 1, 0.0, &temp2.at(0), 1);
746 
747       // call bals level-2 function
748       formic::dgemv('T', Ns, Nind, _hd_shift, &_der_rat.at(0,0), Ns, &temp1.at(0), 1, 0.0, &y.at(0), 1);
749 
750       // get the resulting vector
751       y -= temp2;
752 
753       return;
754     }
755   }
756 
757   // if we are doing excited state calculation and approximate matrix
758   else if ( !_ground && approximate ) {
759 
760     // if we want H*x
761     if ( !transpose ) {
762 
763       // temp vector that stores omega * der_rat * x
764       formic::ColVec<double> temp1(Ns);
765 
766       // temp vector that stores le_der * x
767       formic::ColVec<double> temp2(Ns);
768 
769       // call blas level-2 function
770       formic::dgemv('N', Ns, Nind, _hd_shift, &_der_rat_appro.at(0, 0), Ns, &x.at(0), 1, 0.0, &temp1.at(0), 1);
771 
772       // call blas level-2 function
773       formic::dgemv('N', Ns, Nind, 1.0, &_le_der_appro.at(0, 0), Ns, &x.at(0), 1, 0.0, &temp2.at(0), 1);
774 
775       // combine these two temp vector together
776       temp1 -= temp2;
777 
778       // left multiply by _der_rat_^T
779       formic::dgemv('T', Ns, Nind, _appro_factor, &_der_rat_appro.at(0, 0), Ns, &temp1.at(0), 1, 0.0, &y.at(0), 1);
780 
781       return;
782     }
783 
784     // if we want H^T * x
785     else {
786 
787       // temp vector that store _der_rat * x
788       formic::ColVec<double> temp1(Ns);
789 
790       // call blas level-2 function
791       formic::dgemv('N', Ns, Nind, _appro_factor, &_der_rat_appro.at(0, 0), Ns, &x.at(0), 1, 0.0, &temp1.at(0), 1);
792 
793       // temp vector that store _le_der^T * _der_rat * x
794       formic::ColVec<double> temp2(Nind);
795 
796       // call blas level-2 function
797       formic::dgemv('T', Ns, Nind, 1.0, &_le_der_appro.at(0, 0), Ns, &temp1.at(0), 1, 0.0, &temp2.at(0), 1);
798 
799       // call bals level-2 function
800       formic::dgemv('T', Ns, Nind, _hd_shift, &_der_rat_appro.at(0, 0), Ns, &temp1.at(0), 1, 0.0, &y.at(0), 1);
801 
802       // get the resulting vector
803       y -= temp2;
804 
805       return;
806     }
807   }
808 }
809 
810 ////////////////////////////////////////////////////////////////////////////////////
811 // \brief function that performs hamiltonian matrix-matrix multiplication
812 //
813 // \param[in]   x              input matrix
814 // \param[in]   matrix_built   whether we have already built the matrix or not
815 // \param[in]   transpose      whether to use the transposed hamiltonian or not
816 // \param[in]   approximate    whether to use approximated hamiltonian or not
817 // \param[out]  y              result matrix
818 //
819 ////////////////////////////////////////////////////////////////////////////////////
820 
HMatMatOp(const formic::Matrix<double> & x,formic::Matrix<double> & y,const bool transpose,const bool approximate)821 void cqmc::engine::SpamLMHD::HMatMatOp(const formic::Matrix<double> & x, formic::Matrix<double> & y, const bool transpose, const bool approximate)
822 {
823 
824   // size the resulting matrix correctly
825   y.reset(x.rows(), x.cols());
826 
827   // the number of samples on each process
828   int Ns = _le_der.rows();
829 
830   // the number of independent variables
831   int Nind = _le_der.cols();
832 
833   // the number of new krylov vectors
834   int Nnew = x.cols();
835 
836   // if the approximate flag is set to be true, throw out an error
837   if ( approximate )
838     throw formic::Exception("Matrix-Matrix multiplication doesn't support appriximate matrix");
839 
840   // check to see whether derivative vector matrices have the same size
841   if ( _le_der.rows() != _der_rat.rows() && _le_der.cols() != _der_rat.cols())
842     throw formic::Exception("the input derivative vector %d by %d and %d by %d matrices are of different size!") % _le_der.rows() % _le_der.cols() % _der_rat.rows() % _der_rat.cols();
843 
844   // if we are doing ground state calculation
845   if ( _ground ) {
846 
847     // if we do H*x
848     if ( !transpose ) {
849 
850       // temp matrix to store le_der * x
851       formic::Matrix<double> temp(Ns, Nnew);
852 
853       // call blas level-3 function
854       formic::dgemm('N', 'N', Ns, Nnew, Nind, 1.0, &_le_der.at(0, 0), Ns, &x.at(0, 0), Nind, 0.0, &temp.at(0, 0), Ns);
855 
856       // call blas level-3 function
857       formic::dgemm('T', 'N', Nind, Nnew, Ns, 1.0,  &_der_rat.at(0, 0), Ns, &temp.at(0, 0), Ns, 0.0, &y.at(0, 0), Nind);
858 
859       return;
860     }
861 
862     // if we do H^T * x
863     else {
864 
865       // temp mattrix that stores der_rat * x
866       formic::Matrix<double> temp(Ns, Nnew);
867 
868       // call blas level-3 function
869       formic::dgemm('N', 'N', Ns, Nnew, Nind, 1.0, &_der_rat.at(0, 0), Ns, &x.at(0, 0), Nind, 0.0, &temp.at(0, 0), Ns);
870 
871       // call blas level-3 function
872       formic::dgemm('T', 'N', Nind, Nnew, Ns, 1.0, &_le_der.at(0, 0), Ns, &temp.at(0, 0), Ns, 0.0, &y.at(0, 0), Nind);
873 
874       return;
875     }
876   }
877 
878   // if we are doing excited state calculation
879   else if ( !_ground ) {
880 
881     // if we want H*x
882     if ( !transpose ) {
883 
884       // temp matrix that stores omega * der_rat * x
885       formic::Matrix<double> temp1(Ns, Nnew);
886 
887       // temp matrix that stores le_der * x
888       formic::Matrix<double> temp2(Ns, Nnew);
889 
890       // call blas level-3 function
891       formic::dgemm('N', 'N', Ns, Nnew, Nind, _hd_shift, &_der_rat.at(0, 0), Ns, &x.at(0, 0), Nind, 0.0, &temp1.at(0, 0), Ns);
892 
893       // call blas level-3 function
894       formic::dgemm('N', 'N', Ns, Nnew, Nind, 1.0, &_le_der.at(0, 0), Ns, &x.at(0, 0), Nind, 0.0, &temp2.at(0, 0), Ns);
895 
896       // combine these two temp matrices together
897       temp1 -= temp2;
898 
899       // left multiply by _der_rat^T
900       formic::dgemm('T', 'N', Nind, Nnew, Ns, 1.0, &_der_rat.at(0, 0), Ns, &temp1.at(0, 0), Ns, 0.0, &y.at(0, 0), Nind);
901 
902       return;
903     }
904 
905     // if we want H^T*x
906     else {
907 
908       // temp vector that stored _der_rat * x
909       formic::Matrix<double> temp1(Ns, Nnew);
910 
911       // call blas level-3 function
912       formic::dgemm('N', 'N', Ns, Nnew, Nind, 1.0, &_der_rat.at(0, 0), Ns, &x.at(0, 0), Nind, 0.0, &temp1.at(0, 0), Ns);
913 
914       // temp matrix that stores _le_der^T * _der_rat * x
915       formic::Matrix<double> temp2(Nind, Nnew);
916 
917       // call blas level-3 function
918       formic::dgemm('T', 'N', Nind, Nnew, Ns, 1.0, &_le_der.at(0, 0), Ns, &temp1.at(0, 0), Ns, 0.0, &temp2.at(0, 0), Nind);
919 
920       // call blas level-3 function
921       formic::dgemm('T', 'N', Nind, Nnew, Ns, _hd_shift, &_der_rat.at(0, 0), Ns, &temp1.at(0, 0), Ns, 0.0, &y.at(0, 0), Nind);
922 
923       // get the resulting vector
924       y -= temp2;
925 
926       return;
927     }
928   }
929 }
930 
931 
932 ////////////////////////////////////////////////////////////////////////////////////
933 // \brief function that performs overlap matrix-vector multiplication
934 //
935 // \param[in]   x              input vector
936 // \param[in]   matrix_built   whether we have already built the matrix or not
937 // \param[in]   approximate    whether to use approximated overlap or not
938 // \param[out]  y              result vector
939 // NOTE: Unlike hamiltonian matrix-vector multiplication function, no transpose flag
940 //       in this function because overlap matrix is assumed to be symmetric
941 //
942 ////////////////////////////////////////////////////////////////////////////////////
943 
SMatVecOp(const formic::ColVec<double> & x,formic::ColVec<double> & y,const bool approximate)944 void cqmc::engine::SpamLMHD::SMatVecOp(const formic::ColVec<double> & x, formic::ColVec<double> & y, const bool approximate)
945 {
946 
947   // size the resulting vector correctly
948   y.reset(x.size());
949 
950   // since we do not have the matrix, then we need to do der_rat * le_der * x on each process
951   // the matrix free implementation is a little bit complicated, I will explain it here
952   // we have two derivative vector matrox, L(i, j) = <i|H|[psi_j>/<i|psi>, D(i, j) = <i|psi_j>/<i|psi>
953   // i denotes configuration(electron position in real space and number vector in Hilbert space)
954   // psi_j denotes wfn derivative w.r.t. jth variable, and j=0 means undifferentiated wfn
955   // note that |value/guiding|^2 and weights should be absorbed in L and D matrix
956   // in ground state calculation, H = D^T * D, Hx = D^T * Dx
957   // in excited state calculation, H = (omega * D - L)^T * (omega * D - L), temp1 = omage * D * x
958   // temp2 = Lx
959 
960   // number of samples
961   int Ns = _le_der.rows();
962 
963   // the number of independent variables + 1
964   const int Nind = _le_der.cols();
965 
966   // check to see whether derivative vector matrices have the same size
967   if ( _le_der.rows() != _der_rat.rows() && _le_der.cols() != _der_rat.cols() )
968     throw formic::Exception("input derivative vectors are of different size");
969 
970   // modify number of samples based on approximate degree
971   if ( approximate )
972     Ns /= _appro_degree;
973 
974   // if we are doing ground state calculation
975   if ( _ground && !approximate ) {
976 
977     // temp vector that store _der_rat * x
978     formic::ColVec<double> temp(Ns);
979 
980     // call blas level-2 function
981     formic::dgemv('N', Ns, Nind, 1.0, &_der_rat.at(0, 0), Ns, &x.at(0), 1, 0.0, &temp.at(0), 1);
982 
983     // call blas levec-2 function
984     formic::dgemv('T', Ns, Nind, 1.0, &_der_rat.at(0, 0), Ns, &temp.at(0), 1, 0.0, &y.at(0), 1);
985 
986     return;
987   }
988 
989   // if we multiply by approximate matrix
990   else if ( _ground && approximate ) {
991 
992     // temp vector that stores _der_rat * x
993     formic::ColVec<double> temp(Ns);
994 
995     // call blas level-2 function
996     formic::dgemv('N', Ns, Nind, 1.0, &_der_rat_appro.at(0, 0), Ns, &x.at(0), 1, 0.0, &temp.at(0), 1);
997 
998     // call blas level-2 function
999     formic::dgemv('T', Ns, Nind, _appro_factor, &_der_rat_appro.at(0, 0), Ns, &temp.at(0), 1, 0.0, &y.at(0), 1);
1000 
1001     return;
1002   }
1003 
1004   else if ( !_ground && !approximate ) {
1005 
1006     // temp vectpr to store omega * der_rat * x
1007     formic::ColVec<double> temp1(Ns);
1008 
1009     // temp vector to store le_der * x
1010     formic::ColVec<double> temp2(Ns);
1011 
1012     // temp vector that store omega * der_rat^T * (omega * der_rat - le_der) * x
1013     formic::ColVec<double> temp3(x.size());
1014 
1015     // call blas level-2 function
1016     formic::dgemv('N', Ns, Nind, _hd_shift, &_der_rat.at(0, 0), Ns, &x.at(0), 1, 0.0, &temp1.at(0), 1);
1017 
1018     // call blas level-2 function
1019     formic::dgemv('N', Ns, Nind, 1.0, &_le_der.at(0, 0), Ns, &x.at(0), 1, 0.0, &temp2.at(0), 1);
1020 
1021     // combine these two temp vector together
1022     temp1 -= temp2;
1023 
1024     // omega * D^T * (omega * D - L) * x
1025     formic::dgemv('T', Ns, Nind, _hd_shift, &_der_rat.at(0, 0), Ns, &temp1.at(0), 1, 0.0, &y.at(0), 1);
1026 
1027     // L^T * (omega * D - L) * x
1028     formic::dgemv('T', Ns, Nind, 1.0, &_le_der.at(0, 0), Ns, &temp1.at(0), 1, 0.0, &temp3.at(0), 1);
1029 
1030     // (omega * D^T - L^T) * (omega * D - L) * x
1031     y -= temp3;
1032 
1033     return;
1034   }
1035 
1036   // if we multiply by approximate matrix
1037   else if ( !_ground && approximate ) {
1038 
1039     // temp vectpr to store omega * der_rat * x
1040     formic::ColVec<double> temp1(Ns);
1041 
1042     // temp vector to store le_der * x
1043     formic::ColVec<double> temp2(Ns);
1044 
1045     // temp vector that store omega * der_rat^T * (omega * der_rat - le_der) * x
1046     formic::ColVec<double> temp3(x.size());
1047 
1048     // call blas level-2 function
1049     formic::dgemv('N', Ns, Nind, _hd_shift, &_der_rat_appro.at(0, 0), Ns, &x.at(0), 1, 0.0, &temp1.at(0), 1);
1050 
1051     // call blas level-2 function
1052     formic::dgemv('N', Ns, Nind, 1.0, &_le_der_appro.at(0, 0), Ns, &x.at(0), 1, 0.0, &temp2.at(0), 1);
1053 
1054     // combine these two temp vector together
1055     temp1 -= temp2;
1056 
1057     // omega * D^T * (omega * D - L) * x
1058     formic::dgemv('T', Ns, Nind, _hd_shift, &_der_rat_appro.at(0, 0), Ns, &temp1.at(0), 1, 0.0, &y.at(0), 1);
1059 
1060     // L^T * (omega * D - L) * x
1061     formic::dgemv('T', Ns, Nind, 1.0, &_le_der_appro.at(0, 0), Ns, &temp1.at(0), 1, 0.0, &temp3.at(0), 1);
1062 
1063     // (omega * D^T - L^T) * (omega * D - L) * x
1064     y -= temp3;
1065 
1066     // account for approximation prefactor
1067     y *= _appro_factor;
1068 
1069     return;
1070   }
1071 }
1072 
1073 ////////////////////////////////////////////////////////////////////////////////////
1074 // \brief function that performs overlap matrix-matrix multiplication
1075 //
1076 // \param[in]   x              input matrix
1077 // \param[in]   matrix_built   whether we have already built the matrix or not
1078 // \param[in]   approximate    whether to use approximated overlap or not
1079 // \param[out]  y              result matrix
1080 // NOTE: Unlike hamiltonian matrix-matrix multiplication function, no transpose flag
1081 //       in this function because overlap matrix is assumed to be symmetric
1082 //
1083 ////////////////////////////////////////////////////////////////////////////////////
1084 
SMatMatOp(const formic::Matrix<double> & x,formic::Matrix<double> & y,const bool approximate)1085 void cqmc::engine::SpamLMHD::SMatMatOp(const formic::Matrix<double> & x, formic::Matrix<double> & y, const bool approximate)
1086 {
1087   // size the resulting matrix correctly
1088   y.reset(x.rows(), x.cols());
1089 
1090   // the number of samples on each process
1091   int Ns = _le_der.rows();
1092 
1093   // the number of independent variables
1094   int Nind = _le_der.cols();
1095 
1096   // the number of new krylov vectors
1097   int Nnew = x.cols();
1098 
1099   // if the approximate flag is set to be true, throw out an error
1100   if ( approximate )
1101     throw formic::Exception("Matrix-Matrix multiplication doesn't support approximate matrix");
1102 
1103   // check to see whether derivative vector matrices have the same size
1104   if ( _le_der.rows() != _der_rat.rows() && _le_der.cols() != _der_rat.cols())
1105     throw formic::Exception("the input derivative vector matrices are of different size!");
1106 
1107   // if we are doing ground state calculation
1108   if ( _ground ) {
1109 
1110     // temp matrix that stores _der_rat * x
1111     formic::Matrix<double> temp(Ns, Nnew);
1112 
1113     // call blas level-3 function
1114     formic::dgemm('N', 'N', Ns, Nnew, Nind, 1.0, &_der_rat.at(0, 0), Ns, &x.at(0, 0), Nind, 0.0, &temp.at(0, 0), Ns);
1115 
1116     // call blas level-3 function
1117     formic::dgemm('T', 'N', Nind, Nnew, Ns, 1.0, &_der_rat.at(0, 0), Ns, &temp.at(0, 0), Ns, 0.0, &y.at(0, 0), Nind);
1118 
1119     return;
1120   }
1121 
1122   // if we are doing excited state calculation
1123   else {
1124 
1125     // temp matrix that stores omega * der_rat * x
1126     formic::Matrix<double> temp1(Ns, Nnew);
1127 
1128     // temp matrix that stores le_der * x
1129     formic::Matrix<double> temp2(Ns, Nnew);
1130 
1131     // temp matrix that stores omega * der_rat^T * (omega * der_rat - le_der) * x
1132     formic::Matrix<double> temp3(Nind, Nnew);
1133 
1134     // call blas level-3 function
1135     formic::dgemm('N', 'N', Ns, Nnew, Nind, _hd_shift, &_der_rat.at(0, 0), Ns, &x.at(0, 0), Nind, 0.0, &temp1.at(0, 0), Ns);
1136 
1137     // call blas level-3 function
1138     formic::dgemm('N', 'N', Ns, Nnew, Nind, 1.0, &_le_der.at(0, 0), Ns, &x.at(0, 0), Nind, 0.0, &temp2.at(0, 0), Ns);
1139 
1140     // combine these two temp vectors together
1141     temp1 -= temp2;
1142 
1143     // omega * D^T * (omega * D - L) * x
1144     formic::dgemm('T', 'N', Nind, Nnew, Ns, _hd_shift, &_der_rat.at(0, 0), Ns, &temp1.at(0, 0), Ns, 0.0, &y.at(0, 0), Nind);
1145 
1146     // L^T * (omega * D - L) * x
1147     formic::dgemm('T', 'N', Nind, Nnew, Ns, 1.0, &_le_der.at(0, 0), Ns, &temp1.at(0, 0), Ns, 0.0, &temp3.at(0, 0), Nind);
1148 
1149     // (omega * D^T - L^T) * (omega * D - L) * x
1150     y -= temp3;
1151 
1152     return;
1153   }
1154 }
1155 
1156 
1157 /////////////////////////////////////////////////////////////////////////////////
1158 // \brief constructor
1159 //
1160 //
1161 //
1162 /////////////////////////////////////////////////////////////////////////////////
1163 
SpamLMHD(const formic::VarDeps * dep_ptr,const int nfds,const int lm_krylov_iter,const int inner_maxIter,const int appro_degree,const bool inner_print,const double lm_eigen_thresh,const double lm_min_S_eval,const double appro_factor,const bool var_deps_use,const bool chase_lowest,const bool chase_closest,const bool ground,const std::vector<double> & vf,const double init_energy,const double hd_shift,const double lm_max_e_change,const double total_weight,const double vgsa,formic::Matrix<double> & der_rat,formic::Matrix<double> & le_der,formic::Matrix<double> & der_rat_appro,formic::Matrix<double> & le_der_appro)1164 cqmc::engine::SpamLMHD::SpamLMHD(const formic::VarDeps* dep_ptr,
1165                                  const int nfds,
1166                                  const int lm_krylov_iter,
1167                                  const int inner_maxIter,
1168                                  const int appro_degree,
1169                                  const bool inner_print,
1170                                  const double lm_eigen_thresh,
1171                                  const double lm_min_S_eval,
1172                                  const double appro_factor,
1173                                  const bool var_deps_use,
1174                                  const bool chase_lowest,
1175                                  const bool chase_closest,
1176                                  const bool ground,
1177                                  const std::vector<double>& vf,
1178                                  const double init_energy,
1179                                  const double hd_shift,
1180                                  const double lm_max_e_change,
1181                                  const double total_weight,
1182                                  const double vgsa,
1183                                  formic::Matrix<double> & der_rat,
1184                                  formic::Matrix<double> & le_der,
1185                                  formic::Matrix<double> & der_rat_appro,
1186                                  formic::Matrix<double> & le_der_appro)
1187 :EigenSolver<double>(dep_ptr,
1188                   nfds,
1189                   lm_eigen_thresh,
1190                   var_deps_use,
1191                   chase_lowest,
1192                   chase_closest,
1193                   ground,
1194                   false,
1195                   vf,
1196                   init_energy,
1197                   0.0,
1198                   hd_shift,
1199                   0.0,
1200                   lm_max_e_change,
1201                   total_weight,
1202                   vgsa,
1203                   der_rat,
1204                   le_der),
1205       _nkry(0),
1206       _nkry_full(0),
1207       _n_max_iter(lm_krylov_iter),
1208       _inner_maxIter(inner_maxIter),
1209       _appro_degree(appro_degree),
1210       _inner_print(inner_print),
1211       _smallest_sin_value_inner(0.0),
1212       _smallest_sin_value_outer(0.0),
1213       _singular_value_threshold(lm_min_S_eval),
1214       _init_energy(init_energy),
1215       _energy_outer(init_energy),
1216       _energy_inner(init_energy),
1217       _appro_factor(appro_factor),
1218       _der_rat_appro(der_rat_appro),
1219       _le_der_appro(le_der_appro)
1220 {
1221 }
1222 
1223 /////////////////////////////////////////////////////////////////////////////////////
1224 // \brief solves the eigenvalue problem via the normal davidson method
1225 //
1226 //
1227 //
1228 /////////////////////////////////////////////////////////////////////////////////////
1229 
iterative_solve(double & eval,std::ostream & output)1230 bool cqmc::engine::SpamLMHD::iterative_solve(double & eval, std::ostream & output)
1231 {
1232 
1233   int my_rank = formic::mpi::rank();
1234 
1235   // initialize the solution vector to the unit vector along the first direction
1236   _evecs.reset( ( _var_deps_use ? 1 + _dep_ptr->n_tot() : _nfds ), 0.0 );
1237   _evecs.at(0) = 1.0;
1238 
1239   // ensure that at least one vector is in the outer krylov subspace
1240   if ( my_rank == 0 && _nkry == 0)
1241     throw formic::Exception("Empty krylov subspace upon entry to iterative_solve. Did you forget to add the initial vector?");
1242 
1243   // return value, whether the solver is successful or not
1244   bool retval = true;
1245 
1246   // converge flag(outer)
1247   bool converged_outer = false;
1248 
1249   // converge flag(inner)
1250   bool converged_inner = false;
1251 
1252   // best outer residual
1253   double _best_residual_outer = 1.0e100;
1254 
1255   // best inner residual
1256   double _best_residual_inner = 1.0e100;
1257 
1258   // times of outer iteration
1259   int iter_outer = 0;
1260 
1261   // times of inner iteration
1262   int iter_inner = 0;
1263 
1264   // print out that we have started the iteration
1265   if (my_rank == 0)
1266     output << boost::format("iteration solving starts here(engine) \n") << std::endl << std::endl;
1267 
1268   while(true) {
1269 
1270     // smallest singular value
1271     double smallest_sin_value_outer = 0.0;
1272 
1273     // solve subspace eigenvalue problem on root process
1274     if ( my_rank == 0 ) {
1275       this -> solve_subspace_nonsymmetric(true);
1276     }
1277 
1278     // send resulting eigenvalues to all processes and record it as the new best estimate
1279     formic::mpi::bcast(&_energy_outer, 1);
1280     eval = _energy_outer;
1281 
1282     // check if the energy has an imaginary component and stop iteration when it does
1283     formic::mpi::bcast(&_eval_was_complex, 1);
1284     if ( _eval_was_complex ) {
1285       if ( my_rank == 0 )
1286         output << boost::format("spam iteration %4i: stopping due to imaginary component in energy") % iter_outer << std::endl;
1287       break;
1288     }
1289 
1290     // if energy change is unreasonable, stop iterating and set bad solve flag
1291     if (std::abs(_energy_outer - _init_energy) > _max_energy_change) {
1292       retval = false;
1293       if ( my_rank == 0 )
1294         output << boost::format("spam iteration %4i stopping due to too large eigenvalue change") % iter_outer << std::endl;
1295       break;
1296     }
1297 
1298     // if the overlap matrix becomes singular, stop iterating
1299     formic::mpi::bcast(&_smallest_sin_value_outer, 1);
1300     if (std::abs(_smallest_sin_value_outer) < _singular_value_threshold) {
1301       if (my_rank == 0)
1302         output << boost::format("spam iteration %4i stopping due to small subspace S singular value of %.2e") % iter_outer % _smallest_sin_value_outer << std::endl;
1303       break;
1304     }
1305 
1306     // construct new Krylov vector from subspace eigenvector
1307     _wv1.reset(_nfds);
1308     if (my_rank == 0) {
1309       _wv6.reset(_nfds);
1310       _wv6 = _kvecs * _sub_evec_outer;
1311       // get normalization factor
1312       const double temp_norm = std::sqrt(_wv6.norm2());
1313 
1314       // normalize this new vector
1315       _wv6 /= temp_norm;
1316 
1317       // add up linear combination of Hamiltonian and overlap products to make the new residual vector
1318       _wv1.reset(_nfds);
1319       _wv1 = _hvecs * _sub_evec_outer;
1320       _wv1 = _wv1 - _sub_eval_outer * _svecs * _sub_evec_outer;
1321     }
1322 
1323     // send this new vector to all processes
1324     formic::mpi::bcast(&_wv1.at(0), _wv1.size());
1325 
1326     // compute the residual norm and send it to all processes
1327     double current_outer_residual;
1328     current_outer_residual = _wv1.norm2();
1329     formic::mpi::bcast(&current_outer_residual, 1);
1330 
1331     // if this is the best residual, save it and save the new eigenvector estimate
1332     if (my_rank == 0 && current_outer_residual < _best_residual_outer) {
1333       _best_residual_outer = current_outer_residual;
1334 
1335       // get current eigenvector estimate, which corresponds to the set of independent variables
1336       formic::ColVec<double> ind_evecs;
1337       ind_evecs = _wv6.clone();
1338 
1339       // if our actual variables are dependent on the set of independent variables worded with here, expand the eigenvector into the full set of variables
1340       if ( _var_deps_use ) {
1341 
1342         // size the eigenvector correctly
1343         _evecs.reset( (1 + _dep_ptr -> n_tot()), 0.0);
1344         _evecs.at(0) = ind_evecs.at(0);
1345 
1346         // get some temporary vectors
1347         formic::ColVec<double> _evec_temp(_dep_ptr -> n_tot());
1348         formic::ColVec<double> _ind_temp(_dep_ptr -> n_ind());
1349         for (int i = 0; i < _ind_temp.size(); i++) {
1350           _ind_temp.at(i) = ind_evecs.at(i+1);
1351         }
1352         _dep_ptr -> expand_ind_to_all(&_ind_temp.at(0), &_evec_temp.at(0));
1353 
1354         for ( int i = 0; i < _evec_temp.size(); i++) {
1355           _evecs.at(i+1) = _evec_temp.at(i);
1356         }
1357       }
1358 
1359       // otherwise just copy the eigenvector into output since the independent and total variable sets are the same
1360       else {
1361         _evecs = ind_evecs.clone();
1362       }
1363     }
1364 
1365     // print iteration results
1366     if (my_rank == 0) {
1367 
1368       // if we are doing ground state calculation, then print out energy
1369       if ( _ground )
1370         output << boost::format("spam outer iteration %4i:   krylov dim = %3i   energy = %20.12f       residual = %.2e           smallest_sin_value = %.2e")
1371         % iter_outer
1372         % _nkry
1373         % _energy_outer
1374         % current_outer_residual
1375         % _smallest_sin_value_outer
1376         << std::endl;
1377 
1378       // if we are doing excited state calculation, then print out target function value
1379       else
1380         output << boost::format("spam outer iteration %4i:   krylov dim = %3i   tar_fn = %20.12f       residual = %.2e           smallest_sin_value = %.2e")
1381         % iter_outer
1382         % _nkry
1383         % _sub_eval_outer
1384         % current_outer_residual
1385         % _smallest_sin_value_outer
1386         << std::endl;
1387     }
1388 
1389     // check for convergence
1390     converged_outer = current_outer_residual < _residual_threshold;
1391 
1392     // if iteration has already converged, we exit iteration
1393     if ( converged_outer )
1394       break;
1395 
1396     // if iteration hasn't converged, we increment the iteration count by 1 and stop if maximum number of iterations has been reached
1397     if ( iter_outer ++ >= _n_max_iter)
1398       break;
1399 
1400     // now this is important, we add the new krylov basis vector to inner loop
1401     this -> add_krylov_vector_inner(_wv1);
1402 
1403     // now enter inner loop
1404     while (true) {
1405 
1406       // average of smallest singular value
1407       double smallest_sin_val_avg_inner = 0.0;
1408 
1409       // solve subspace eigenvalue problem on root process
1410       if ( my_rank == 0 ) {
1411         this -> solve_subspace_nonsymmetric(false);
1412       }
1413 
1414       // send resulting eigenvalues to all processes and record it as the new best estimate
1415       formic::mpi::bcast(&_energy_inner, 1);
1416 
1417       // check if the energy(or target function) has an imaginary component and stop iteration when it does
1418       formic::mpi::bcast(&_eval_was_complex, 1);
1419       if ( _eval_was_complex ) {
1420         if ( my_rank == 0 )
1421           output << boost::format("spam outer iteration %4i inner iteration %4i: stopping due to imaginary component in energy") % iter_outer % iter_inner << std::endl;
1422         break;
1423       }
1424 
1425       // if energy(or target function) change is unreasonable, stop iterating but don't set bag solve flag
1426       if (std::abs(_energy_inner - _init_energy) > _max_energy_change) {
1427         //retval = false;
1428         if ( my_rank == 0 )
1429           output << boost::format("spam outer iteration %4i inner iteration %4i: stopping due to too large eigenvalue change") % iter_outer % iter_inner << std::endl;
1430         break;
1431       }
1432 
1433       // if the overlap matrix becomes singular, stop iterating
1434       formic::mpi::bcast(&_smallest_sin_value_inner, 1);
1435       if (std::abs(_smallest_sin_value_inner) < _singular_value_threshold) {
1436         if (my_rank == 0)
1437           output << boost::format("spam outer iteration %4i inner iteration %4i: stopping due to too small S singular value of %.2e") % iter_outer % iter_inner % _smallest_sin_value_inner << std::endl;
1438         break;
1439       }
1440 
1441       // construct new krylov vector from subspace eigenvector
1442       if (my_rank == 0) {
1443         _wv6.reset(_nfds);
1444         _wv6 = _kvecs * _sub_evec_inner;
1445         // get normalization factor
1446         const double temp_norm = std::sqrt(_wv6.norm2());
1447 
1448         // normalize this new vector
1449         _wv6 /= temp_norm;
1450 
1451         // add up linear combination of Hamiltonian and overlap products to make the new residual vector
1452         _wv1.reset(_nfds);
1453         _wv1 = _hhvecs * _sub_evec_inner;
1454         _wv1 = _wv1 - _sub_eval_inner * _hsvecs * _sub_evec_inner;
1455       }
1456 
1457       // send this new vector to all processes
1458       formic::mpi::bcast(&_wv1.at(0), _wv1.size());
1459 
1460       // compute the residual norm and send it to all processes
1461       double current_inner_residual;
1462       current_inner_residual = _wv1.norm2();
1463       formic::mpi::bcast(&current_inner_residual, 1);
1464 
1465       // if this is the best residual, save it and save the new eigenvector estimate
1466       if (my_rank == 0 && current_inner_residual < _best_residual_inner)
1467         _best_residual_inner = current_inner_residual;
1468 
1469       // print iteration results if request
1470       if (my_rank == 0 && _inner_print) {
1471 
1472         // if we are doing ground state calculation, then print out energy
1473         if ( _ground )
1474           output << boost::format("spam outer iteration %4i inner iteration %4i: krylov dim = %3i  energy = %20.12f      residual = %.2e        smallest_sin_value = %.2e")
1475           % iter_outer
1476           % iter_inner
1477           % _kvecs.cols()
1478           % _energy_inner
1479           % current_inner_residual
1480           % _smallest_sin_value_inner
1481           << std::endl;
1482 
1483         // if we are doing excited state calculation, then print out target function value
1484         else
1485           output << boost::format("spam outer iteration %4i inner iteration %4i: krylov dim = %3i  energy = %20.12f      residual = %.2e        smallest_sin_value = %.2e")
1486           % iter_outer
1487           % iter_inner
1488           % _kvecs.cols()
1489           % _sub_eval_inner
1490           % current_inner_residual
1491           % _smallest_sin_value_inner
1492           << std::endl;
1493       }
1494 
1495       // check for convergence
1496       converged_inner = current_inner_residual < _residual_threshold;
1497 
1498       // if iteration has already converged, we exit iteration
1499       if ( converged_inner )
1500         break;
1501 
1502       // if iteration hasn't converged, we increment the iteration count by 1 and stop if maximum number of iterations has been reached
1503       if ( iter_inner ++ >= _inner_maxIter )
1504         break;
1505 
1506       // add this new krylov basis vector to inner loop
1507       this -> add_krylov_vector_inner(_wv1);
1508 
1509     // end of spam inner loop
1510     }
1511 
1512     // size the intermediate vectors correctly for non-root process
1513     if ( my_rank != 0 )
1514       _kvecs_about_to_add.reset(_nfds, _nkry - _nkry_full);
1515 
1516     // broadcast intermediate vectors that will be added to full krylov space
1517     formic::mpi::bcast(&_kvecs_about_to_add.at(0, 0), _kvecs_about_to_add.size());
1518 
1519     // add these new krylov vectors to outer loop
1520     this -> add_krylov_vectors_outer(_kvecs_about_to_add);
1521 
1522     // clear these intermediate vectors
1523     _kvecs_about_to_add.reset(0, 0);
1524 
1525     // reset the number of inner iterations
1526     iter_inner = 0;
1527 
1528   // end of spam outer loop
1529   }
1530 
1531   // print iteration information
1532   if (converged_outer && my_rank == 0)
1533     output << boost::format("spam solver converged in %10i iterations") % iter_outer << std::endl << std::endl;
1534 
1535   else if (my_rank == 0)
1536     output << boost::format("spam solver did not converge after %10i iterations") % iter_outer << std::endl << std::endl;
1537 
1538   return retval;
1539 
1540 }
1541 
1542 ///////////////////////////////////////////////////////////////////////////////////////////////
1543 // \brief solves the eigenvalue problem
1544 //
1545 //
1546 //
1547 ///////////////////////////////////////////////////////////////////////////////////////////////
1548 
solve(double & eval,std::ostream & output)1549 bool cqmc::engine::SpamLMHD::solve(double & eval, std::ostream & output)
1550 {
1551   return this -> iterative_solve(eval, output);
1552 }
1553 
1554 ////////////////////////////////////////////////////////////////////////////////////
1555 // \brief updates hamiltonian * krylov vector and hamiltonian projection based on
1556 //        new shift
1557 //
1558 //
1559 ////////////////////////////////////////////////////////////////////////////////////
1560 
update_hvecs_sub(const double new_i_shift,const double new_s_shift)1561 void cqmc::engine::SpamLMHD::update_hvecs_sub(const double new_i_shift, const double new_s_shift)
1562 {
1563   int my_rank = formic::mpi::rank();
1564 
1565   // get the different between new shift and old shift
1566   const double diff_shift_i = new_i_shift - _hshift_i;
1567   const double diff_shift_s = new_s_shift - _hshift_s;
1568 
1569   if (my_rank == 0) {
1570     // update "identity shift" for the hamiltonian product
1571     for (int j = 0; j < _nkry; j ++) {
1572       for (int i = 1; i < _nfds; i ++) {
1573         _hvecs.at(i, j) += diff_shift_i * _kvecs.at(i, j);
1574         _thvecs.at(i, j) += diff_shift_i * _kvecs.at(i, j);
1575       }
1576     }
1577 
1578     // update "overlap shift" for the hamiltonian product
1579     for (int j = 1; j < _nkry; j++) {
1580       for (int i = 0; i < _nfds; i++) {
1581         _hvecs.at(i, j) += diff_shift_s * _svecs.at(i, j);
1582         _thvecs.at(i, j) += diff_shift_s * _svecs.at(i, j);
1583       }
1584     }
1585 
1586     _hhvecs = _hvecs;
1587 
1588     // update projection of hamiltonian matrix
1589     _subH = _kvecs.t() * _hvecs;
1590     _hy_subH = _subH.clone();
1591   }
1592 }
1593 
1594 ////////////////////////////////////////////////////////////////////////////////////
1595 // \brief reset the eigen solver
1596 //
1597 // \brief clear subspace projection of Hamiltonian and overlap matrix, clear Krylov
1598 //        subspace and action of Hamiltonian and overlap matrix
1599 ////////////////////////////////////////////////////////////////////////////////////
1600 
child_reset()1601 void cqmc::engine::SpamLMHD::child_reset()
1602 {
1603   // clear subspace projection of Hamiltonian and overlap matrix
1604   _subH.reset(0, 0);
1605   _hy_subH.reset(0, 0);
1606   _subS.reset(0, 0);
1607   _hy_subS.reset(0, 0);
1608 
1609   // clear Krylov subspace
1610   _nkry = 0;
1611   _nkry_full = 0;
1612   _kvecs.reset(0, 0);
1613   _kvecs_about_to_add.reset(0, 0);
1614 
1615   // clear Hamiltonian and overlap matrix's action on krylov subspace
1616   _hvecs.reset(0, 0);
1617   _thvecs.reset(0, 0);
1618   _ahvecs.reset(0, 0);
1619   _athvecs.reset(0, 0);
1620   _hhvecs.reset(0, 0);
1621 
1622   _svecs.reset(0, 0);
1623   _asvecs.reset(0, 0);
1624   _hsvecs.reset(0, 0);
1625 
1626   // clear eigenvector and wavefunction coefficients
1627   _sub_evec_outer.reset(0);
1628   _sub_evec_inner.reset(0);
1629   _evecs_inner.reset(0);
1630 
1631   // clear all values calculated from last solve
1632   _sub_eval_outer = 0.0;
1633   _sub_eval_inner = 0.0;
1634   _smallest_sin_value_outer = 1e10;
1635   _smallest_sin_value_inner = 1e10;
1636 
1637   // set the inner and outer energy to be initial energy
1638   _energy_inner = _init_energy;
1639   _energy_outer = _init_energy;
1640 }
1641 
1642 ///////////////////////////////////////////////////////////////////////////////////////////////
1643 // \brief converts eigenvectors into wave function coefficients
1644 //        solving this question Hc = Sd for d, S is the ground overlap matrix
1645 //
1646 //
1647 ///////////////////////////////////////////////////////////////////////////////////////////////
1648 
child_convert_to_wf_coeff()1649 void cqmc::engine::SpamLMHD::child_convert_to_wf_coeff()
1650 {
1651 
1652   return;
1653 
1654 }
1655