1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: mp2grad.cc
4 // Copyright (C) 2012 Toru Shiozaki
5 //
6 // Author: Toru Shiozaki <shiozaki@northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
23 //
24 
25 #include <src/pt2/mp2/mp2grad.h>
26 #include <src/pt2/mp2/mp2cache.h>
27 #include <src/pt2/mp2/mp2accum.h>
28 #include <src/grad/cphf.h>
29 #include <iostream>
30 #include <iomanip>
31 #include <src/util/f77.h>
32 #include <src/util/prim_op.h>
33 #include <src/prop/multipole.h>
34 #include <src/grad/gradeval.h>
35 #include <src/prop/moprint.h>
36 
37 using namespace std;
38 using namespace bagel;
39 using namespace btas;
40 
MP2Grad(shared_ptr<const PTree> input,shared_ptr<const Geometry> g,shared_ptr<const Reference> ref)41 MP2Grad::MP2Grad(shared_ptr<const PTree> input, shared_ptr<const Geometry> g, shared_ptr<const Reference> ref) : MP2(input, g, ref) {
42 
43 }
44 
45 
46 // does nothing.
compute()47 void MP2Grad::compute() { }
48 
49 
50 template<>
energyvec() const51 vector<double> GradEval<MP2Grad>::energyvec() const {
52   vector<double> out;
53   out.push_back(energy());
54   return out;
55 }
56 
57 
58 template<>
compute(const string jobtitle,shared_ptr<const GradInfo> gradinfo)59 shared_ptr<GradFile> GradEval<MP2Grad>::compute(const string jobtitle, shared_ptr<const GradInfo> gradinfo) {
60   Timer time;
61 
62   const size_t ncore = task_->ncore();
63   const size_t nmobasis = ref_->coeff()->mdim();
64 
65   // since this is only for closed shell
66   const size_t nocc = ref_->nocc() - ncore;
67   const size_t nocca = ref_->nocc();
68   if (nocc < 1) throw runtime_error("no correlated electrons");
69   const size_t nvirt = nmobasis - nocca;
70   if (nvirt < 1) throw runtime_error("no virtuals orbitals");
71 
72   const MatView ccmat = ref_->coeff()->slice(0, ncore);
73   const MatView ocmat = ref_->coeff()->slice(0, nocca);
74   const MatView vcmat = ref_->coeff()->slice(nocca, nmobasis);
75   const MatView acmat = ref_->coeff()->slice(ncore, nocca);
76 
77   // first compute half transformed integrals
78   shared_ptr<DFHalfDist> half;
79   shared_ptr<const Geometry> cgeom;
80   if (task_->abasis().empty()) {
81     cgeom = geom_;
82     half = geom_->df()->compute_half_transform(acmat);
83   } else {
84     auto info = make_shared<PTree>(); info->put("df_basis", task_->abasis());
85     cgeom = make_shared<Geometry>(*geom_, info, false);
86     half = cgeom->df()->compute_half_transform(acmat);
87   }
88   shared_ptr<const DFHalfDist> halfjj = geom_->df()->compute_half_transform(ocmat)->apply_JJ();
89   // second transform for virtual index
90   // this is now (naux, nocc, nvirt)
91   shared_ptr<const DFFullDist> full = half->compute_second_transform(vcmat)->apply_J();
92   shared_ptr<const DFFullDist> bv = full->apply_J();
93   shared_ptr<DFFullDist> gia = bv->clone();
94 
95   time.tick_print("3-index integral transform");
96 
97   // assemble
98   const VectorB eig_tm = ref_->eig();
99   const double* eig = eig_tm.data() + ncore;
100 
101   auto dmp2 = make_shared<Matrix>(nmobasis, nmobasis);
102   double ecorr = 0.0;
103   {
104     // second transform for virtual index and rearrange data
105     auto dist = make_shared<StaticDist>(full->nocc1()*full->nocc2(), mpi__->size(), full->nocc2());
106     auto fullt = make_shared<DFDistT>(full->swap(), dist);
107 
108     MP2Cache cache(fullt->naux(), nocc, nvirt, fullt);
109 
110     size_t memory_size = half->block(0)->size() * 2;
111     mpi__->broadcast(&memory_size, 1, 0);
112     const int nloop = cache.nloop();
113     const int ncache = min(memory_size/(nvirt*nvirt), size_t(20));
114     cout << "    * ncache = " << ncache << endl;
115     for (int n = 0; n != min(ncache, nloop); ++n)
116       cache.block(n, -1);
117 
118     for (int n = 0; n != nloop; ++n) {
119       // take care of data. The communication should be hidden
120       if (n+ncache < nloop)
121         cache.block(n+ncache, n-1);
122 
123       const int i = get<0>(cache.task(n));
124       const int j = get<1>(cache.task(n));
125       if (i < 0 || j < 0) continue;
126       cache.data_wait(n);
127 
128       shared_ptr<const Matrix> iblock = cache(i);
129       shared_ptr<const Matrix> jblock = cache(j);
130       const Matrix mat(*iblock % *jblock); // V
131       Matrix mat2 = mat; // 2T-T^t
132       if (i != j) {
133         mat2 *= 2.0;
134         mat2 -= *mat.transpose();
135       }
136       Matrix mat3 = mat; // T
137 
138       for (int a = 0; a != nvirt; ++a) {
139         for (int b = 0; b != nvirt; ++b) {
140           const double denom = -eig[a+nocc]+eig[i]-eig[b+nocc]+eig[j];
141           mat2(b,a) /= denom;
142           mat3(b,a) /= denom;
143         }
144       }
145 
146       const double fac = i == j ? 1.0 : 2.0;
147       ecorr += mat.dot_product(mat2) * fac;
148       dmp2->add_block(2.0, nocca, nocca, nvirt, nvirt, mat2 % mat3);
149       if (i != j)
150         dmp2->add_block(2.0, nocca, nocca, nvirt, nvirt, mat2 ^ mat3);
151     }
152     cache.wait();
153     // allreduce energy contributions
154     mpi__->allreduce(&ecorr, 1);
155   }
156 
157   time.tick_print("First pass based on occupied orbitals");
158 
159   // do the same with virtual-virtual index distribution to obtain D_ij
160   // TODO is there any better way??
161   {
162     // second transform for virtual index and rearrange data
163     auto dist = make_shared<StaticDist>(full->nocc1()*full->nocc2(), mpi__->size(), full->nocc1());
164     auto fullt = make_shared<DFDistT>(full, dist);
165     MP2Cache cache(fullt->naux(), nvirt, nocc, fullt);
166 
167     auto giat = fullt->clone();
168     MP2Accum accum(fullt->naux(), nvirt, nocc, giat, cache.tasks());
169 
170     size_t memory_size = half->block(0)->size() * 2;
171     mpi__->broadcast(&memory_size, 1, 0);
172     const int nloop = cache.nloop();
173     const int ncache = min(memory_size/(nocc*nocc), size_t(30));
174     cout << "    * ncache = " << ncache << endl;
175     for (int n = 0; n != min(ncache, nloop); ++n)
176       cache.block(n, -1);
177 
178     for (int n = 0; n != nloop; ++n) {
179       // take care of data. The communication should be hidden
180       if (n+ncache < nloop)
181         cache.block(n+ncache, n-1);
182 
183       const int i = get<0>(cache.task(n));
184       const int j = get<1>(cache.task(n));
185       if (i >= 0 && j >= 0) {
186         cache.data_wait(n);
187 
188         shared_ptr<const Matrix> iblock = cache(i);
189         shared_ptr<const Matrix> jblock = cache(j);
190         Matrix mat2 = *iblock % *jblock; // 2T-T^t
191         Matrix mat3 = mat2; // T
192         if (i != j) {
193           mat2 *= 2.0;
194           mat2 -= *mat3.transpose();
195         }
196 
197         for (int a = 0; a != nocc; ++a) {
198           for (int b = 0; b != nocc; ++b) {
199             const double denom = -eig[a]+eig[i+nocc]-eig[b]+eig[j+nocc];
200             mat2(b,a) /= denom;
201             mat3(b,a) /= denom;
202           }
203         }
204 
205         accum.accumulate<0>(n, make_shared<Matrix>(*jblock ^ mat2));
206         accum.accumulate<1>(n, make_shared<Matrix>(*iblock * mat2));
207 
208         dmp2->add_block(-2.0, ncore, ncore, nocc, nocc, mat2 % mat3);
209         if (i != j)
210           dmp2->add_block(-2.0, ncore, ncore, nocc, nocc, mat2 ^ mat3);
211       } else {
212         // for receiving data
213         accum.accumulate<0>(n, nullptr);
214         accum.accumulate<1>(n, nullptr);
215       }
216     }
217     accum.wait();
218     cache.wait();
219     giat = giat->apply_J();
220     giat->get_paralleldf(gia);
221   }
222 
223   dmp2->allreduce();
224   gia->scale(-1.0);
225 
226   time.tick_print("Second pass based on virtual orbitals");
227   cout << endl;
228   cout << "      MP2 correlation energy: " << fixed << setw(15) << setprecision(10) << ecorr << endl << endl;
229 
230   // L''aq = 2 Gia(D|ia) (D|iq)
231   shared_ptr<const Matrix> laq = gia->form_2index(half, 2.0);
232   const Matrix lai = *laq * ocmat;
233 
234   // Gip = Gia(D|ia) C+_ap
235   shared_ptr<DFHalfDist> gip = gia->back_transform(vcmat);
236   // Liq = 2 Gip(D) * (D|pq)
237   Matrix lia(nocca, nvirt);
238   Matrix lif(nocc, max(1lu,ncore));
239   shared_ptr<const Matrix> lip = gip->form_2index(cgeom->df(), 2.0);
240   {
241     lia.add_block(1.0, ncore, 0, nocc, nvirt, *lip * vcmat);
242     if (ncore)
243       lif = *lip * ccmat;
244   }
245 
246   // core-occ density matrix elements
247   for (int i = 0; i != ncore; ++i)
248     for (int j = ncore; j != nocca; ++j)
249       dmp2->element(j,i) = dmp2->element(i,j) = lif(j-ncore, i) / (eig_tm(j)-eig_tm(i));
250 
251   // 2*J_al(d_rs)
252   auto dmp2ao_part = make_shared<const Matrix>(*ref_->coeff() * *dmp2 ^ *ref_->coeff());
253   const Matrix jai = vcmat % *geom_->df()->compute_Jop(dmp2ao_part) * ocmat * 2.0;
254   // -1*K_al(d_rs)
255   const Matrix kia = *halfjj->compute_Kop_1occ(dmp2ao_part, -1.0) * vcmat;
256 
257   auto grad = make_shared<Matrix>(nmobasis, nmobasis);
258   for (int i = 0; i != nocca; ++i)
259     for (int a = 0; a != nvirt; ++a)
260       // minus sign is due to the convention in the solvers which solve Ax+B=0..
261       grad->element(a+nocca, i) = - (lai(a,i) - lia(i,a) - jai(a,i) - kia(i,a));
262 
263   {
264     auto d_unrelaxed = make_shared<Matrix>(*dmp2);
265     for (int i = 0; i != nocca; ++i) d_unrelaxed->element(i,i) += 2.0;
266     auto dao_unrelaxed = make_shared<Matrix>(*ref_->coeff() * *d_unrelaxed ^ *ref_->coeff());
267     Dipole dipole(geom_, dao_unrelaxed, "MP2 unrelaxed");
268     dipole.compute();
269   }
270   time.tick_print("Right hand side of CPHF");
271   cout << endl;
272 
273   // solving CPHF (or Z-vector equation)
274   auto cphf = make_shared<CPHF>(grad, ref_->eig(), halfjj, ref_);
275   shared_ptr<Matrix> dia = cphf->solve(task_->scf()->thresh_scf(), gradinfo->maxziter());
276   *dmp2 += *dia;
277 
278   // total density matrix
279   auto dtot = make_shared<Matrix>(*dmp2);
280   for (int i = 0; i != nocca; ++i) dtot->element(i,i) += 2.0;
281 
282   // computes dipole mements
283   auto dtotao = make_shared<Matrix>(*ref_->coeff() * *dtot ^ *ref_->coeff());
284   {
285     Dipole dipole(geom_, dtotao, "MP2 relaxed");
286     dipole_ = dipole.compute();
287   }
288 
289   // print relaxed density if requested
290   if (gradinfo->density_print()) {
291     auto density_print = make_shared<MOPrint>(gradinfo->moprint_info(), geom_, ref_, /*is_density=*/true, make_shared<const ZMatrix>(*dtotao, 1.0));
292     density_print->compute();
293   }
294 
295   ////////////////////////////////////////////////////////////////////////////
296 
297   time.tick_print("CPHF solved");
298 
299   // one electron matrices
300   auto dmp2ao = make_shared<Matrix>(*ref_->coeff() * *dmp2 ^ *ref_->coeff());
301   auto d0ao = make_shared<Matrix>(*dtotao - *dmp2ao);
302   auto dbarao = make_shared<Matrix>(*dtotao - *d0ao*0.5);
303 
304   // size of naux
305   shared_ptr<const VectorB> cd0 = geom_->df()->compute_cd(d0ao);
306   shared_ptr<const VectorB> cdbar = geom_->df()->compute_cd(dbarao);
307 
308 
309   // three-index derivatives (seperable part)...
310   vector<shared_ptr<const VectorB>> cd {cd0, cdbar};
311   vector<shared_ptr<const Matrix>> dd {dbarao, d0ao};
312 
313   shared_ptr<DFHalfDist> sepd = halfjj->apply_density(dbarao);
314   shared_ptr<DFDist> sep3 = sepd->back_transform(ocmat);
315   sep3->scale(-2.0);
316   sep3->add_direct_product(cd, dd, 1.0);
317   /// mp2 two body part ----------------
318   shared_ptr<DFDist> sep32;
319   if (geom_ == cgeom) {
320     sep3->ax_plus_y(4.0, gip->back_transform(acmat));
321   } else {
322     sep32 = gip->back_transform(acmat);
323     sep32->scale(4.0);
324   }
325 
326   // two-index derivatives (seperable part)..
327   auto sep2 = make_shared<Matrix>((*cd0 ^ *cdbar) * 2.0);
328   *sep2 -= *halfjj->form_aux_2index(sepd, 2.0);
329   shared_ptr<const Matrix> sep22;
330   if (geom_ == cgeom) {
331     *sep2 += *gia->form_aux_2index_apply_J(full, 4.0);
332   } else {
333     sep22 = gia->form_aux_2index_apply_J(full, 4.0);
334   }
335 
336   // energy weighted density
337   auto wd = make_shared<Matrix>(nmobasis, nmobasis);
338   for (int i = 0; i != nocca; ++i)
339     for (int j = 0; j != nocca; ++j)
340       wd->element(j,i) += dtot->element(j,i) * eig_tm(j);
341   for (int i = 0; i != nvirt; ++i)
342     for (int j = 0; j != nvirt; ++j)
343       wd->element(j+nocca,i+nocca) += dmp2->element(j+nocca,i+nocca) * eig_tm(j+nocca);
344   for (int i = 0; i != nocca; ++i)
345     for (int j = 0; j != nvirt; ++j)
346       wd->element(j+nocca,i) += 2.0 * dmp2->element(j+nocca,i) * eig_tm(i);
347   // Liq + Laq
348   wd->add_block(1.0, ncore, 0, nocc, nocca, *lip * ocmat);
349   wd->add_block(1.0, nocca, 0, nvirt, nocca, *laq * ocmat * 2.0);
350   wd->add_block(1.0, nocca, nocca, nvirt, nvirt, *laq * vcmat);
351   wd->add_block(1.0, 0, 0, nocca, nocca, (ocmat % *geom_->df()->compute_Jop(dmp2ao) * ocmat * 2.0));
352   wd->add_block(1.0, 0, 0, nocca, nocca, (*halfjj->compute_Kop_1occ(dmp2ao, -1.0) * ocmat));
353 
354   wd->symmetrize();
355   auto wdao = make_shared<Matrix>(*ref_->coeff() * *wd ^ *ref_->coeff());
356 
357   time.tick_print("Density matrices computed");
358   cout << endl;
359 
360   // gradient evaluation
361   shared_ptr<GradFile> gradf = contract_gradient(dtotao, wdao, sep3, sep2, /*nacme = */nullptr, false, cgeom, sep32, sep22);
362 
363   time.tick_print("Gradient integrals contracted");
364   cout << endl;
365 
366   // set proper energy_
367   energy_ = ref_->energy(0) + ecorr;
368 
369   gradf->print();
370   return gradf;
371 
372 }
373 
374 
375