1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: spinfreebase.cc
4 // Copyright (C) 2014 Toru Shiozaki
5 //
6 // Author: Toru Shiozaki <shiozaki@northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
23 //
24 
25 #include <bagel_config.h>
26 #ifdef COMPILE_SMITH
27 
28 #include <numeric>
29 #include <src/smith/moint.h>
30 #include <src/smith/spinfreebase.h>
31 #include <src/smith/smith_util.h>
32 #include <src/mat1e/cap.h>
33 
34 using namespace std;
35 using namespace bagel;
36 using namespace bagel::SMITH;
37 
38 template<typename DataType>
SpinFreeMethod(shared_ptr<const SMITH_Info<DataType>> inf)39 SpinFreeMethod<DataType>::SpinFreeMethod(shared_ptr<const SMITH_Info<DataType>> inf) : info_(inf), info_orig_(info_) {
40   static_assert(is_same<DataType,double>::value or is_same<DataType,complex<double>>::value,
41                 "illegal DataType for SpinFreeMethod");
42 
43   Timer timer;
44   const int max = info_->maxtile();
45   if (info_->ncore() > info_->nclosed())
46     throw runtime_error("frozen core has been specified but there are not enough closed orbitals");
47 
48   const int ncore2 = info_->ncore()*(is_same<DataType,double>::value ? 1 : 2);
49 
50   closed_ = IndexRange(info_->nclosed()-info_->ncore(), max, 0, info_->ncore());
51   if (is_same<DataType,complex<double>>::value)
52     closed_.merge(IndexRange(info_->nclosed()-info_->ncore(), max, closed_.nblock(), ncore2+closed_.size(), info_->ncore()));
53 
54   active_ = IndexRange(info_->nact(), min(10,max), closed_.nblock(), ncore2+closed_.size());
55   if (is_same<DataType,complex<double>>::value)
56     active_.merge(IndexRange(info_->nact(), min(10,max), closed_.nblock()+active_.nblock(), ncore2+closed_.size()+active_.size(),
57                                                                                             ncore2+closed_.size()));
58 
59   virt_ = IndexRange(info_->nvirt(), max, closed_.nblock()+active_.nblock(), ncore2+closed_.size()+active_.size());
60   if (is_same<DataType,complex<double>>::value)
61     virt_.merge(IndexRange(info_->nvirt(), max, closed_.nblock()+active_.nblock()+virt_.nblock(), ncore2+closed_.size()+active_.size()+virt_.size(),
62                                                                                                   ncore2+closed_.size()+active_.size()));
63 
64   all_    = closed_; all_.merge(active_); all_.merge(virt_);
65 
66   rclosed_ = make_shared<const IndexRange>(closed_);
67   ractive_ = make_shared<const IndexRange>(active_);
68   rvirt_   = make_shared<const IndexRange>(virt_);
69 
70   // f1 tensor.
71   {
72     MOFock<DataType> fock(info_, {all_, all_});
73     f1_ = fock.tensor();
74     h1_ = fock.h1();
75     core_energy_ = fock.core_energy();
76     // canonical orbitals within closed and virtual subspaces
77     coeff_ = fock.coeff();
78   }
79 
80   // v2 tensor.
81   {
82     IndexRange occ(closed_);  occ.merge(active_);
83     IndexRange virt(active_); virt.merge(virt_);
84 
85     // in the case of MRCI, we need to include all sectors
86     if (to_upper(info_->method()) == "MRCI") {
87       occ = all_;
88       virt = all_;
89     }
90     K2ext<DataType> v2k(info_, coeff_, {occ, virt, occ, virt});
91     v2_ = v2k.tensor();
92   }
93   timer.tick_print("MO integral evaluation");
94 
95   auto fockact = make_shared<MatType>(active_.size(), active_.size());
96   const int nclosed2 = info_->nclosed() * (is_same<DataType,double>::value ? 1 : 2);
97   for (auto& i1 : active_)
98     for (auto& i0 : active_)
99       fockact->copy_block(i0.offset()-nclosed2, i1.offset()-nclosed2, i0.size(), i1.size(), f1_->get_block(i0, i1).get());
100   fockact_ = fockact->get_conjg();
101 
102   // set Eref
103   const int nstates = info_->nact() ? info_->ciwfn()->nstates() : 1;
104   eref_ = make_shared<MatType>(nstates, nstates);
105   if (info_->nact())
106     for (int i = 0; i != nstates; ++i)
107       eref_->element(i, i) = info_->ciwfn()->energy(i);
108   else
109     eref_->element(0, 0) = info_->ref()->energy(0);
110 
111   if (nstates > 1 && info_->do_xms()) {
112     rotate_xms();
113     eref_->print("Reference energies in XMS basis");
114   }
115 
116   // Reference properties if requested
117   reference_prop();
118 
119   // rdms.
120   if (info_->ciwfn()) {
121     feed_rdm_denom();
122     timer.tick_print("RDM + denominator evaluation");
123   }
124 
125   // set e0all_
126   compute_e0();
127 
128   mpi__->barrier();
129 }
130 
131 
132 template<>
rotate_xms()133 void SpinFreeMethod<double>::rotate_xms() {
134   assert(fockact_);
135   const int nstates = info_->ciwfn()->nstates();
136   Matrix fmn(nstates, nstates);
137 
138   for (int ist = 0; ist != nstates; ++ist) {
139     for (int jst = 0; jst <= ist; ++jst) {
140       // first compute 1RDM
141       shared_ptr<const RDM<1>> rdm1;
142       tie(rdm1, ignore) = info_->rdm12(jst, ist);
143       // then assign the dot product: fmn=fij rdm1
144       fmn(ist, jst) = blas::dot_product(fockact_->data(), fockact_->size(), rdm1->data());
145       fmn(jst, ist) = fmn(ist, jst);
146     }
147   }
148 
149   fmn.print("State-averaged Fock matrix over basis states");
150 
151   // diagonalize fmn
152   VectorB eig(nstates);
153   fmn.diagonalize(eig);
154 
155   fmn.print("Extended multi-state CASPT2 (XMS-CASPT2) rotation matrix");
156 
157   // construct Reference
158   shared_ptr<const CIWfn> new_ciwfn = rotate_ciwfn(info_->ciwfn(), fmn);
159   auto new_ref = make_shared<Reference>(info_->geom(), make_shared<Coeff>(*info_->coeff()), info_->nclosed(), info_->nact(),
160                                         info_->nvirt() + info_->nfrozenvirt(), info_->ref()->energy(), info_->ref()->rdm1(), info_->ref()->rdm2(),
161                                         info_->ref()->rdm1_av(), info_->ref()->rdm2_av(), new_ciwfn);
162 
163   // construct SMITH_info
164   info_ = make_shared<SMITH_Info<double>>(new_ref, info_);
165 
166   // update eref_
167   eref_ = make_shared<Matrix>(fmn % (*eref_) *fmn);
168   xmsmat_ = make_shared<Matrix>(move(fmn));
169 }
170 
171 
172 template<>
rotate_xms()173 void SpinFreeMethod<complex<double>>::rotate_xms() {
174   assert(fockact_);
175   const int nstates = info_->ciwfn()->nstates();
176   ZMatrix fmn(nstates, nstates);
177 
178   for (int ist = 0; ist != nstates; ++ist) {
179     for (int jst = 0; jst <= ist; ++jst) {
180       // first compute 1RDM
181       shared_ptr<const Kramers<2,ZRDM<1>>> krdm1;
182       tie(krdm1, ignore) = info_->rdm12(jst, ist);
183       shared_ptr<ZRDM<1>> rdm1 = expand_kramers(krdm1, krdm1->begin()->second->norb());
184       assert(fockact_->size() == rdm1->size());
185       // then assign the dot product: fmn=fij rdm1
186       fmn(jst, ist) = blas::dot_product_noconj(fockact_->data(), fockact_->size(), rdm1->data());
187       fmn(ist, jst) = std::conj(fmn(jst, ist));
188 #ifndef NDEBUG
189       tie(krdm1, ignore) = info_->rdm12(ist, jst);
190       rdm1 = expand_kramers(krdm1, krdm1->begin()->second->norb());
191       assert(std::abs(fmn(ist, jst) - blas::dot_product_noconj(fockact_->data(), fockact_->size(), rdm1->data())) < 1.0e-6);
192 #endif
193     }
194   }
195 
196   fmn.print("State-averaged Fock matrix over basis states");
197   // diagonalize fmn
198   VectorB eig(nstates);
199   fmn.diagonalize(eig);
200 
201   fmn.print("Extended multi-state CASPT2 (XMS-CASPT2) rotation matrix");
202 
203   // construct Reference
204   shared_ptr<const RelCIWfn> new_ciwfn = rotate_ciwfn(info_->ciwfn(), fmn);
205   auto relref = dynamic_pointer_cast<const RelReference>(info_->ref());
206   auto relcoeff = dynamic_pointer_cast<const ZCoeff_Block>(info_->coeff());
207   assert(relref && relcoeff);
208   auto new_ref = make_shared<RelReference>(info_->geom(), relcoeff->striped_format(), relref->energy(),
209                                            relref->nneg(), info_->nclosed(), info_->nact(), info_->nvirt() + info_->nfrozenvirt(),
210                                            info_->gaunt(), info_->breit(), /*kramers*/true,
211                                            relref->rdm1_av(), relref->rdm2_av(), new_ciwfn);
212 
213   // construct SMITH_info
214   info_ = make_shared<SMITH_Info<complex<double>>>(new_ref, info_);
215 
216   // update eref_
217   eref_ = make_shared<ZMatrix>(fmn % (*eref_) *fmn);
218   xmsmat_ = make_shared<ZMatrix>(move(fmn));
219 }
220 
221 
222 template<>
reference_prop() const223 void SpinFreeMethod<double>::reference_prop() const {
224   const int nstates = info_->ciwfn()->nstates();
225   const array<double,3> cap = info_->geom()->cap();
226 
227   const bool dothis = !::isnan(cap[0]);
228   if (dothis) {
229     auto acoeff = info_->coeff()->slice(info_->nclosed(), info_->nocc());
230     auto ccoeff = info_->coeff()->slice(0, info_->nclosed());
231 
232     map<pair<int,int>, shared_ptr<const RDM<1>>> rdmmap;
233     for (int i = 0; i != nstates; ++i)
234       for (int j = 0; j <= i; ++j) {
235         auto rdm = info_->rdm12(j, i);
236         rdmmap.emplace(make_pair(j,i), get<0>(rdm));
237       }
238 
239     auto compute_local = [&,this](const Matrix& mat1e, const string title) {
240       Matrix propmat(nstates, nstates);
241       if (info_->nclosed()) {
242         auto cvec = (ccoeff % mat1e * ccoeff).diag();
243         const double csum = accumulate(cvec.begin(), cvec.end(), 0.0);
244         for (int i = 0; i != nstates; ++i)
245           propmat(i, i) += csum * 2.0;
246       }
247       auto amat = acoeff % mat1e * acoeff;
248       for (int i = 0; i != nstates; ++i)
249         for (int j = 0; j <= i; ++j)
250           propmat(j, i) = propmat(i, j) = blas::dot_product(rdmmap.at(make_pair(j,i))->data(), info_->nact()*info_->nact(), amat.data());
251       propmat.print(title);
252     };
253 
254     // here comes a list of properties to be printed
255     CAP cap(info_->geom());
256     cap.compute();
257     compute_local(cap, "CAP");
258   }
259 }
260 
261 
262 template<>
reference_prop() const263 void SpinFreeMethod<complex<double>>::reference_prop() const {
264   // not implemented yet
265 }
266 
267 
268 template<>
feed_rdm_denom()269 void SpinFreeMethod<double>::feed_rdm_denom() {
270   const int nclo = info_->nclosed();
271   const int nstates = info_->ciwfn()->nstates();
272   rdm0all_ = make_shared<Vec<Tensor_<double>>>();
273   rdm1all_ = make_shared<Vec<Tensor_<double>>>();
274   rdm2all_ = make_shared<Vec<Tensor_<double>>>();
275   rdm3all_ = make_shared<Vec<Tensor_<double>>>();
276   rdm4fall_ = make_shared<Vec<Tensor_<double>>>();
277   if (info_->rdm4_eval()) {
278     rdm4all_ = make_shared<Vec<Tensor_<double>>>();
279     cout << "    * Calculating and saving rdm4" << endl << endl;
280   } else {
281     cout << "    * Calculating and saving rdm4f" << endl << endl;
282   }
283 
284   assert(fockact_);
285   shared_ptr<Denom<double>> denom;
286   if (info_->sssr())
287     denom = make_shared<Denom_SSSR<double>>(fockact_, nstates, info_->thresh_overlap());
288   else
289     denom = make_shared<Denom_MSMR<double>>(fockact_, nstates, info_->thresh_overlap());
290 
291   // TODO this can be reduced to half by bra-ket symmetry
292   for (int ist = 0; ist != nstates; ++ist) {
293     for (int jst = 0; jst != nstates; ++jst) {
294 
295       shared_ptr<const RDM<1>> rdm1;
296       shared_ptr<const RDM<2>> rdm2;
297       shared_ptr<const RDM<3>> rdm3;
298       shared_ptr<RDM<3>> rdm4f;
299       shared_ptr<const RDM<4>> rdm4;
300 
301       tie(rdm1, rdm2) = info_->rdm12(jst, ist);
302       // CASPT2 energy needs only rdm4f. Others (including caspt2 grad) req rdm4
303       if (info_->rdm4_eval()) {
304         tie(rdm3, rdm4) = info_->rdm34(jst, ist);
305         rdm4f = info_->rdm4f_contract(rdm3, rdm4, fockact_);
306       } else {
307         tie(rdm3, rdm4f) = info_->rdm34f(jst, ist, fockact_);
308       }
309       // append denominator
310       if (!info_->sssr() || jst == ist)
311         denom->append(jst, ist, rdm1, rdm2, rdm3, rdm4f);
312 
313       // then scale rdm4f to compensate
314       const double rdm4factor = 1.0 / static_cast<double>(active_.nblock() * active_.nblock());
315       rdm4f->scale(rdm4factor);
316 
317       unique_ptr<double[]> data0(new double[1]);
318       data0[0] = jst == ist ? 1.0 : 0.0;
319       auto rdm0t = make_shared<Tensor_<double>>(vector<IndexRange>());
320       rdm0t->allocate();
321       if (rdm0t->is_local())
322         rdm0t->put_block(data0);
323       auto rdm1t = fill_block<2,double>(rdm1, vector<int>(2,nclo), vector<IndexRange>(2,active_));
324       auto rdm2t = fill_block<4,double>(rdm2, vector<int>(4,nclo), vector<IndexRange>(4,active_));
325       auto rdm3t = fill_block<6,double>(rdm3, vector<int>(6,nclo), vector<IndexRange>(6,active_));
326       auto rdm4ft = fill_block<6,double>(rdm4f, vector<int>(6,nclo), vector<IndexRange>(6,active_));
327 
328       rdm0all_->emplace(jst, ist, rdm0t);
329       rdm1all_->emplace(jst, ist, rdm1t);
330       rdm2all_->emplace(jst, ist, rdm2t);
331       rdm3all_->emplace(jst, ist, rdm3t);
332       rdm4fall_->emplace(jst, ist, rdm4ft);
333       if (info_->rdm4_eval()) {
334         auto rdm4t = fill_block<8,double>(rdm4, vector<int>(8,nclo), vector<IndexRange>(8,active_));
335         rdm4all_->emplace(jst, ist, rdm4t);
336       }
337     }
338   }
339   denom->compute();
340   denom_ = denom;
341 }
342 
343 
344 template<>
feed_rdm_denom()345 void SpinFreeMethod<complex<double>>::feed_rdm_denom() {
346   const int nclo = info_->nclosed();
347   const int nstates = info_->ciwfn()->nstates();
348   rdm0all_ = make_shared<Vec<Tensor_<complex<double>>>>();
349   rdm1all_ = make_shared<Vec<Tensor_<complex<double>>>>();
350   rdm2all_ = make_shared<Vec<Tensor_<complex<double>>>>();
351   rdm3all_ = make_shared<Vec<Tensor_<complex<double>>>>();
352   rdm4fall_ = make_shared<Vec<Tensor_<complex<double>>>>();
353   if (info_->rdm4_eval()) {
354     rdm4all_ = make_shared<Vec<Tensor_<complex<double>>>>();
355     cout << "    * Calculating and saving rdm4" << endl << endl;
356   } else {
357     cout << "    * Calculating and saving rdm4f" << endl << endl;
358   }
359 
360   assert(fockact_);
361   shared_ptr<Denom<complex<double>>> denom;
362   if (info_->sssr())
363     denom = make_shared<Denom_SSSR<complex<double>>>(fockact_, nstates, info_->thresh_overlap());
364   else
365     denom = make_shared<Denom_MSMR<complex<double>>>(fockact_, nstates, info_->thresh_overlap());
366 
367   // TODO this can be reduced to half by bra-ket symmetry
368   for (int ist = 0; ist != nstates; ++ist) {
369     for (int jst = 0; jst != nstates; ++jst) {
370 
371       shared_ptr<const Kramers<2,ZRDM<1>>> rdm1;
372       shared_ptr<const Kramers<4,ZRDM<2>>> rdm2;
373       shared_ptr<const Kramers<6,ZRDM<3>>> rdm3;
374       shared_ptr<Kramers<6,ZRDM<3>>> rdm4f;
375       shared_ptr<const Kramers<8,ZRDM<4>>> rdm4;
376       tie(rdm1, rdm2) = info_->rdm12(jst, ist);
377       if (info_->rdm4_eval()) {
378         tie(rdm3, rdm4) = info_->rdm34(jst, ist);
379         rdm4f = info_->rdm4f_contract(rdm3, rdm4, fockact_);
380       } else {
381         tie(rdm3, rdm4f) = info_->rdm34f(jst, ist, fockact_);
382       }
383       shared_ptr<const Kramers<6,ZRDM<3>>> rdm4fe = rdm4f->copy();
384 
385       auto rdm1ex  = expand_kramers(rdm1, info_->nact());
386       auto rdm2ex  = expand_kramers(rdm2, info_->nact());
387       auto rdm3ex  = expand_kramers(rdm3, info_->nact());
388       auto rdm4fex = expand_kramers(rdm4fe, info_->nact());
389       if (!info_->sssr() || jst == ist) {
390         denom->append(jst, ist, rdm1ex, rdm2ex, rdm3ex, rdm4fex);
391       }
392 
393       auto rdm0t = make_shared<Tensor_<complex<double>>>(vector<IndexRange>());
394       unique_ptr<complex<double>[]> data0(new complex<double>[1]);
395       data0[0] = jst == ist ? 1.0 : 0.0;
396       rdm0t->allocate();
397       if (rdm0t->is_local())
398         rdm0t->put_block(data0);
399 
400       const int n = info_->nact();
401 //#define ALL_KRAMERS
402 #ifndef ALL_KRAMERS
403       auto rdm1x = rdm1ex->clone();
404       auto rdm2x = rdm2ex->clone();
405       auto rdm3x = rdm3ex->clone();
406       auto rdm4fx = rdm4fex->clone();
407       const double rdm4factor = 1.0 / static_cast<double>(active_.nblock() * active_.nblock());
408       rdm4fex->scale(rdm4factor);
409       sort_indices<1,0,0,1,1,1>        (rdm1ex->data(), rdm1x->data(), 2*n, 2*n);
410       sort_indices<1,0,3,2,0,1,1,1>    (rdm2ex->data(), rdm2x->data(), 2*n, 2*n, 2*n, 2*n);
411       sort_indices<1,0,3,2,5,4,0,1,1,1>(rdm3ex->data(), rdm3x->data(), 2*n, 2*n, 2*n, 2*n, 2*n, 2*n);
412       sort_indices<1,0,3,2,5,4,0,1,1,1>(rdm4fex->data(), rdm4fx->data(), 2*n, 2*n, 2*n, 2*n, 2*n, 2*n);
413       auto rdm1t = fill_block<2,complex<double>>(rdm1x, vector<int>(2,nclo*2), vector<IndexRange>(2,active_));
414       auto rdm2t = fill_block<4,complex<double>>(rdm2x, vector<int>(4,nclo*2), vector<IndexRange>(4,active_));
415       auto rdm3t = fill_block<6,complex<double>>(rdm3x, vector<int>(6,nclo*2), vector<IndexRange>(6,active_));
416       auto rdm4ft = fill_block<6,complex<double>>(rdm4fx, vector<int>(6,nclo*2), vector<IndexRange>(6,active_));
417 #else
418       shared_ptr<Kramers<2,ZRDM<1>>> rdm1x = rdm1->copy();
419       shared_ptr<Kramers<4,ZRDM<2>>> rdm2x = rdm2->copy();
420       shared_ptr<Kramers<6,ZRDM<3>>> rdm3x = rdm3->copy();
421       shared_ptr<Kramers<6,ZRDM<3>>> rdm4fx = rdm4f->copy();
422       auto j1 = rdm1x->begin();
423       auto j2 = rdm2x->begin();
424       auto j3 = rdm3x->begin();
425       auto j4 = rdm4fx->begin();
426       const double rdm4factor = 1.0 / static_cast<double>(active_.nblock() * active_.nblock());
427       for (auto& i : *rdm1) sort_indices<1,0,0,1,1,1>        (i.second->data(), (*j1++).second->data(), n, n);
428       for (auto& i : *rdm2) sort_indices<1,0,3,2,0,1,1,1>    (i.second->data(), (*j2++).second->data(), n, n, n, n);
429       for (auto& i : *rdm3) sort_indices<1,0,3,2,5,4,0,1,1,1>(i.second->data(), (*j3++).second->data(), n, n, n, n, n, n);
430       for (auto& i : *rdm4f) {
431         ((*j4++).second)->scale(rdm4factor);
432         sort_indices<1,0,3,2,5,4,0,1,1,1>(i.second->data(), (*j4).second->data(), n, n, n, n, n, n);
433       }
434       auto rdm1t = fill_block<2,complex<double>,ZRDM<1>>(rdm1x, vector<int>(2,nclo*2), vector<IndexRange>(2,active_));
435       auto rdm2t = fill_block<4,complex<double>,ZRDM<2>>(rdm2x, vector<int>(4,nclo*2), vector<IndexRange>(4,active_));
436       auto rdm3t = fill_block<6,complex<double>,ZRDM<3>>(rdm3x, vector<int>(6,nclo*2), vector<IndexRange>(6,active_));
437       auto rdm4ft = fill_block<6,complex<double>,ZRDM<3>>(rdm4fx, vector<int>(6,nclo*2), vector<IndexRange>(6,active_));
438 #endif
439       rdm0all_->emplace(ist, jst, rdm0t);
440       rdm1all_->emplace(ist, jst, rdm1t);
441       rdm2all_->emplace(ist, jst, rdm2t);
442       rdm3all_->emplace(ist, jst, rdm3t);
443       rdm4fall_->emplace(ist, jst, rdm4ft);
444       if (info_->rdm4_eval()) {
445 //#define RDM4_KRAMERS
446 #ifdef RDM4_KRAMERS
447         auto rdm4x = make_shared<Kramers<8,ZRDM<4>>>();
448         rdm4x->set_perm(rdm4->perm());
449         for (auto& i : *rdm4) {
450           shared_ptr<ZRDM<4>> data = i.second->clone();
451           sort_indices<1,0,3,2,5,4,7,6,0,1,1,1>(i.second->data(), data->data(), n, n, n, n, n, n, n, n);
452           rdm4x->emplace(i.first.perm({1,0,3,2,5,4,7,6}), data);
453         }
454         auto rdm4t = fill_block<8,complex<double>,ZRDM<4>>(rdm4x, vector<int>(8,nclo*2), vector<IndexRange>(8,active_));
455 #else
456         auto rdm4ex  = expand_kramers(rdm4, info_->nact());
457         auto rdm4x = rdm4ex->clone();
458         sort_indices<1,0,3,2,5,4,7,6,0,1,1,1>(rdm4ex->data(), rdm4x->data(), 2*n, 2*n, 2*n, 2*n, 2*n, 2*n, 2*n, 2*n);
459         auto rdm4t = fill_block<8,complex<double>>(rdm4x, vector<int>(8,nclo*2), vector<IndexRange>(8,active_));
460 #endif
461         rdm4all_->emplace(ist, jst, rdm4t);
462       }
463     }
464   }
465   denom->compute();
466   denom_ = denom;
467 }
468 
469 
470 template<>
feed_rdm_2fderiv(shared_ptr<const SMITH_Info<double>> info,shared_ptr<const Matrix> fockact,const int istate)471 shared_ptr<Matrix> SpinFreeMethod<double>::feed_rdm_2fderiv(shared_ptr<const SMITH_Info<double>> info, shared_ptr<const Matrix> fockact, const int istate) {
472   const int nact = info->nact();
473   const int ndet = info->ref()->civectors()->data(istate)->size();
474   auto rdm1d_full = make_shared<Matrix>(ndet, nact * nact);
475   {
476     shared_ptr<Dvec> rdm1a = info->ref()->rdm1deriv(istate);
477     for (int i = 0; i != nact * nact; ++i)
478       copy_n(rdm1a->data(i)->data(), ndet, rdm1d_full->element_ptr(0, i));
479   }
480   shared_ptr<Matrix> rdm2fd = info->ref()->rdm2fderiv(istate, fockact, rdm1d_full);
481 
482   return rdm2fd;
483 }
484 
485 
486 template<>
487 tuple<shared_ptr<VectorB>, shared_ptr<Matrix>,shared_ptr<Matrix>, shared_ptr<Matrix>>
feed_rdm_deriv(shared_ptr<const SMITH_Info<double>> info,shared_ptr<const Matrix> fockact,const int istate,const size_t offset,const size_t size,shared_ptr<const Matrix> rdm2fd_in)488   SpinFreeMethod<double>::feed_rdm_deriv(shared_ptr<const SMITH_Info<double>> info,
489       shared_ptr<const Matrix> fockact, const int istate, const size_t offset, const size_t size, shared_ptr<const Matrix> rdm2fd_in) {
490 
491   const int nact = info->nact();
492   auto rdm0d = make_shared<VectorB>(size);
493   copy_n(info->ref()->civectors()->data(istate)->data() + offset, size, rdm0d->data());
494 
495   const int ndet = info->ref()->civectors()->data(istate)->size();
496   auto rdm1d_full = make_shared<Matrix>(ndet, nact * nact);
497   auto rdm1d = make_shared<Matrix>(size, nact * nact);
498   {
499     shared_ptr<Dvec> rdm1a = info->ref()->rdm1deriv(istate);
500     auto fill2 = [&offset, &size](shared_ptr<const Dvec> in, shared_ptr<Matrix> out) {
501       assert(out->mdim() == in->ij());
502       for (int i = 0; i != in->ij(); ++i)
503         copy_n(in->data(i)->data() + offset, size, out->element_ptr(0, i));
504     };
505     fill2(rdm1a, rdm1d);
506 
507     for (int i = 0; i != nact * nact; ++i)
508       copy_n(rdm1a->data(i)->data(), ndet, rdm1d_full->element_ptr(0, i));
509   }
510 
511   shared_ptr<Matrix> rdm2d;
512   shared_ptr<Matrix> rdm3fd;
513 
514   // Recycle [J|k+l|0] = <J|m+k+ln|0> f_mn.
515   tie(rdm2d, rdm3fd) = info->ref()->rdm3deriv(istate, fockact, offset, size, rdm1d_full, rdm2fd_in);
516 
517   return tie(rdm0d, rdm1d, rdm2d, rdm3fd);
518 }
519 
520 
521 template<>
rotate_ciwfn(std::shared_ptr<const CIWfn> input,const Matrix & rotation) const522 std::shared_ptr<CIWfn> SpinFreeMethod<double>::rotate_ciwfn(std::shared_ptr<const CIWfn> input, const Matrix& rotation) const {
523   // construct CIWfn
524   const int nstates = input->nstates();
525   assert(rotation.ndim() == rotation.mdim() && rotation.ndim() == nstates);
526   shared_ptr<const Dvec> dvec = input->civectors();
527   shared_ptr<Dvec> new_dvec = dvec->clone();
528   vector<shared_ptr<Civector<double>>> civecs = dvec->dvec();
529   vector<shared_ptr<Civector<double>>> new_civecs = new_dvec->dvec();
530 
531   for (int jst =0; jst != nstates; ++jst) {
532     for (int ist =0; ist != nstates; ++ist)
533       new_civecs[jst]->ax_plus_y(rotation(ist,jst), civecs[ist]);
534   }
535 
536   vector<double> energies(nstates);
537   for (int i = 0; i != nstates; ++i)
538     energies[i] = input->energy(i);
539   return make_shared<CIWfn>(input->geom(), input->ncore(), input->nact(), nstates, energies, new_dvec, input->det());
540 }
541 
542 
543 template<>
rotate_ciwfn(std::shared_ptr<const RelCIWfn> input,const ZMatrix & rotation) const544 std::shared_ptr<RelCIWfn> SpinFreeMethod<std::complex<double>>::rotate_ciwfn(std::shared_ptr<const RelCIWfn> input, const ZMatrix& rotation) const {
545   // construct CIWfn
546   // TODO:  Verify this chunk of code carefully
547   const int nstates = input->nstates();
548   assert(rotation.ndim() == rotation.mdim() && rotation.ndim() == nstates);
549   shared_ptr<const RelZDvec> dvec = input->civectors();
550   shared_ptr<RelZDvec> new_dvec = dvec->clone();
551 
552   map<pair<int, int>, shared_ptr<Dvector<complex<double>>>> dvecs = dvec->dvecs();
553   map<pair<int, int>, shared_ptr<Dvector<complex<double>>>> new_dvecs = new_dvec->dvecs();
554 
555   for (auto& i: dvecs) {
556     vector<shared_ptr<Civector<complex<double>>>> civecs = dvecs.at(i.first)->dvec();
557     vector<shared_ptr<Civector<complex<double>>>> new_civecs = new_dvecs.at(i.first)->dvec();
558     for (int jst =0; jst != nstates; ++jst)
559       for (int ist =0; ist != nstates; ++ist)
560         new_civecs[jst]->ax_plus_y(rotation(ist,jst), civecs[ist]);
561   }
562 
563   vector<double> energies(nstates);
564   for (int i = 0; i != nstates; ++i)
565     energies[i] = input->energy(i);
566   return make_shared<RelCIWfn>(input->geom(), input->ncore(), input->nact(), nstates, energies, new_dvec, input->det());
567 }
568 
569 
570 template<>
feed_rdm_2fderiv(shared_ptr<const SMITH_Info<complex<double>>> info,shared_ptr<const ZMatrix> fockact,const int istate)571 shared_ptr<ZMatrix> SpinFreeMethod<complex<double>>::feed_rdm_2fderiv(shared_ptr<const SMITH_Info<complex<double>>> info, shared_ptr<const ZMatrix> fockact, const int istate) {
572   throw logic_error("SpinFreeMethod::feed_rdm_2fderiv is not implemented for relativistic cases.");
573   shared_ptr<ZMatrix> dum;
574   return dum;
575 }
576 
577 
578 template<>
579 tuple<shared_ptr<ZVectorB>, shared_ptr<ZMatrix>, shared_ptr<ZMatrix>, shared_ptr<ZMatrix>>
feed_rdm_deriv(shared_ptr<const SMITH_Info<complex<double>>> info,shared_ptr<const ZMatrix> fockact,const int istate,const size_t offset,const size_t size,shared_ptr<const ZMatrix> rdm2fd_in)580   SpinFreeMethod<complex<double>>::feed_rdm_deriv(shared_ptr<const SMITH_Info<complex<double>>> info,
581           shared_ptr<const ZMatrix> fockact, const int istate, const size_t offset, const size_t size, shared_ptr<const ZMatrix> rdm2fd_in) {
582   throw logic_error("SpinFreeMethod::feed_rdm_deriv is not implemented for relativistic cases.");
583   shared_ptr<ZVectorB> du;
584   shared_ptr<ZMatrix> dum;
585   return tie(du, dum, dum, dum);
586 }
587 
588 
589 template<typename DataType>
set_rdm(const int ist,const int jst)590 void SpinFreeMethod<DataType>::set_rdm(const int ist, const int jst) {
591   if (info_->nact()) {
592     // ist is bra, jst is ket.
593     // CAREFUL! the following is due to SMITH's convention (i.e., index are reversed)
594     rdm0_ = rdm0all_->at(jst, ist);
595     rdm1_ = rdm1all_->at(jst, ist);
596     rdm2_ = rdm2all_->at(jst, ist);
597     rdm3_ = rdm3all_->at(jst, ist);
598     rdm4f_ = rdm4fall_->at(jst, ist);
599     if (info_->rdm4_eval())
600       rdm4_ = rdm4all_->at(jst, ist);
601 
602     // ensure that get_block calls are done after RDMs are set in every node
603     mpi__->barrier();
604   }
605 }
606 
607 
608 template<typename DataType>
print_iteration()609 void SpinFreeMethod<DataType>::print_iteration() {
610   cout << "      ---- iteration ----" << endl << endl;
611 }
612 
613 
614 template<typename DataType>
print_iteration(const int i,const double en,const double err,const double tim,const int ist)615 void SpinFreeMethod<DataType>::print_iteration(const int i, const double en, const double err, const double tim, const int ist) {
616   cout << "     " << setw(4) << i;
617   if (ist >= 0)
618     cout << setw(4) << ist;
619   cout << setw(15) << fixed << setprecision(8) << en << setw(15) << fixed << setprecision(8) << err
620                                                      << setw(10) << fixed << setprecision(2) << tim << endl;
621 }
622 
623 
624 template<typename DataType>
print_iteration(const bool noconv)625 void SpinFreeMethod<DataType>::print_iteration(const bool noconv) {
626   cout << endl << "      -------------------" << endl;
627   if (noconv) cout << "      *** Convergence not reached ***" << endl;
628   cout << endl;
629 }
630 
631 
632 template<typename DataType>
compute_e0()633 void SpinFreeMethod<DataType>::compute_e0() {
634   assert(!!f1_);
635   const size_t nstates = info_->nact() ? info_->ciwfn()->nstates() : 1;
636   e0all_.resize(nstates);
637   if (info_->nact()) {
638     for (int ist = 0; ist != nstates; ++ist) {
639       DataType sum = 0.0;
640       set_rdm(ist, ist);
641       assert(!!rdm1_);
642       for (auto& i1 : active_) {
643         for (auto& i0 : active_) {
644           if (f1_->is_local(i0, i1)) {
645             const size_t size = i0.size() * i1.size();
646             unique_ptr<DataType[]> fdata = f1_->get_block(i0, i1);
647             unique_ptr<DataType[]> rdata = rdm1_->get_block(i0, i1);
648             sum += blas::dot_product_noconj(fdata.get(), size, rdata.get());
649           }
650         }
651       }
652       mpi__->allreduce(&sum, 1);
653       e0all_[ist] = detail::real(sum);
654     }
655   } else {
656     e0all_[0] = 0.0;
657   }
658   // printout
659   cout << endl;
660   for (int ist = 0; ist != nstates; ++ist)
661     cout << "    * Zeroth order energy : state " << setw(2) << ist << setw(20) << setprecision(10) << e0all_[ist] << endl;
662   cout << endl;
663 }
664 
665 
666 // local function to compress the following
667 template<typename DataType>
loop_over(function<void (const Index &,const Index &,const Index &,const Index &)> func) const668 void SpinFreeMethod<DataType>::loop_over(function<void(const Index&, const Index&, const Index&, const Index&)> func) const {
669   for (auto& i3 : virt_)
670     for (auto& i2 : closed_)
671       for (auto& i1 : virt_)
672         for (auto& i0 : closed_)
673           func(i0, i1, i2, i3);
674   for (auto& i2 : active_)
675     for (auto& i0 : active_)
676       for (auto& i3 : virt_)
677         for (auto& i1 : virt_)
678           func(i0, i1, i2, i3);
679   for (auto& i0 : active_)
680     for (auto& i3 : virt_)
681       for (auto& i2 : closed_)
682         for (auto& i1 : virt_)
683           func(i0, i1, i2, i3);
684   for (auto& i3 : active_)
685     for (auto& i2 : closed_)
686       for (auto& i1 : virt_)
687         for (auto& i0 : closed_)
688           func(i0, i1, i2, i3);
689   for (auto& i3 : active_)
690     for (auto& i1 : active_)
691       for (auto& i2 : closed_)
692         for (auto& i0 : closed_)
693           func(i0, i1, i2, i3);
694   for (auto& i3 : active_)
695     for (auto& i2 : active_)
696       for (auto& i1 : virt_)
697         for (auto& i0 : closed_) {
698           func(i0, i1, i2, i3);
699           func(i2, i1, i0, i3);
700         }
701   for (auto& i3 : active_)
702     for (auto& i2 : active_)
703       for (auto& i0 : active_)
704         for (auto& i1 : virt_)
705           func(i0, i1, i2, i3);
706   for (auto& i3 : active_)
707     for (auto& i1 : active_)
708       for (auto& i0 : active_)
709         for (auto& i2 : closed_)
710           func(i0, i1, i2, i3);
711 }
712 
713 
714 template<typename DataType>
init_amplitude() const715 shared_ptr<Tensor_<DataType>> SpinFreeMethod<DataType>::init_amplitude() const {
716   unordered_set<size_t> sparse;
717   auto put = [&sparse](const Index& i0, const Index& i1, const Index& i2, const Index& i3) {
718     sparse.insert(generate_hash_key(i0, i1, i2, i3));
719   };
720   loop_over(put);
721   return make_shared<Tensor_<DataType>>(v2_->indexrange(), /*kramers*/false, sparse, /*alloc*/true);
722 }
723 
724 
725 template<typename DataType>
init_residual() const726 shared_ptr<Tensor_<DataType>> SpinFreeMethod<DataType>::init_residual() const {
727   unordered_set<size_t> sparse;
728   auto put = [&sparse](const Index& i0, const Index& i1, const Index& i2, const Index& i3) {
729     sparse.insert(generate_hash_key(i2, i3, i0, i1));
730   };
731   loop_over(put);
732   return make_shared<Tensor_<DataType>>(v2_->indexrange(), /*kramers*/false, sparse, /*alloc*/true);
733 }
734 
735 
736 template<typename DataType>
dot_product_transpose(shared_ptr<const MultiTensor_<DataType>> r,shared_ptr<const MultiTensor_<DataType>> t2) const737 DataType SpinFreeMethod<DataType>::dot_product_transpose(shared_ptr<const MultiTensor_<DataType>> r, shared_ptr<const MultiTensor_<DataType>> t2) const {
738   assert(r->nref() == t2->nref());
739   DataType out = 0.0;
740   for (int i = 0; i != r->nref(); ++i) {
741     out += detail::conj(r->fac(i)) * t2->fac(i);
742     if (r->at(i) && t2->at(i))
743       out += dot_product_transpose(r->at(i), t2->at(i));
744   }
745   return out;
746 }
747 
748 
749 template<typename DataType>
dot_product_transpose(shared_ptr<const Tensor_<DataType>> r,shared_ptr<const Tensor_<DataType>> t2) const750 DataType SpinFreeMethod<DataType>::dot_product_transpose(shared_ptr<const Tensor_<DataType>> r, shared_ptr<const Tensor_<DataType>> t2) const {
751   DataType out = 0.0;
752   auto prod = [this, &r, &t2, &out](const Index& i0, const Index& i1, const Index& i2, const Index& i3) {
753     const size_t size = r->get_size(i2, i3, i0, i1);
754     if (r->is_local(i2, i3, i0, i1) && size != 0) {
755       unique_ptr<DataType[]> tmp0 = t2->get_block(i0, i1, i2, i3);
756       unique_ptr<DataType[]> tmp1(new DataType[size]);
757       sort_indices<2,3,0,1,0,1,1,1>(tmp0.get(), tmp1.get(), i0.size(), i1.size(), i2.size(), i3.size());
758 
759       out += blas::dot_product(r->get_block(i2, i3, i0, i1).get(), size, tmp1.get());
760     }
761   };
762   loop_over(prod);
763   mpi__->allreduce(&out, 1);
764   return out;
765 }
766 
767 
768 #define SPINFREEMETHOD_DETAIL
769 #include <src/smith/spinfreebase_update.cpp>
770 #undef SPINFREEMETHOD_DETAIL
771 
772 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
773 // explict instantiation at the end of the file
774 template class bagel::SMITH::SpinFreeMethod<double>;
775 template class bagel::SMITH::SpinFreeMethod<complex<double>>;
776 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
777 
778 #endif
779