1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: fci_rdmderiv.cc
4 // Copyright (C) 2011 Toru Shiozaki
5 //
6 // Author: Toru Shiozaki <shiozaki@northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
23 //
24 
25 
26 #include <src/ci/fci/fci.h>
27 #include <src/wfn/rdm.h>
28 #include <src/util/math/algo.h>
29 #include <src/util/parallel/rmawindow.h>
30 #include <src/util/taskqueue.h>
31 
32 using namespace std;
33 using namespace bagel;
34 
35 
rdm1deriv(const int target) const36 shared_ptr<Dvec> FCI::rdm1deriv(const int target) const {
37 
38   auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
39   cc_->set_det(detex);
40   shared_ptr<Civec> cbra = cc_->data(target);
41 
42   // 1RDM ci derivative
43   // <I|E_ij|0>
44 
45   auto dbra = make_shared<Dvec>(cbra->det(), norb_ * norb_);
46   dbra->zero();
47   sigma_2a1(cbra, dbra);
48   sigma_2a2(cbra, dbra);
49 
50   return dbra;
51 }
52 
53 
rdm2deriv(const int target) const54 shared_ptr<Dvec> FCI::rdm2deriv(const int target) const {
55   auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
56   cc_->set_det(detex);
57   shared_ptr<Civec> cbra = cc_->data(target);
58 
59   // make  <I|E_ij|0>
60   auto dbra = make_shared<Dvec>(cbra->det(), norb_ * norb_);
61   dbra->zero();
62   sigma_2a1(cbra, dbra);
63   sigma_2a2(cbra, dbra);
64 
65   // second make <J|E_kl|I><I|E_ij|0> - delta_li <J|E_kj|0>
66   auto ebra = make_shared<Dvec>(cbra->det(), norb_ * norb_ * norb_ * norb_);
67   auto tmp = make_shared<Dvec>(cbra->det(), norb_ * norb_);
68   int ijkl = 0;
69   int ij = 0;
70   for (auto iter = dbra->dvec().begin(); iter != dbra->dvec().end(); ++iter, ++ij) {
71     const int j = ij / norb_;
72     const int i = ij - j*norb_;
73     tmp->zero();
74     sigma_2a1(*iter, tmp);
75     sigma_2a2(*iter, tmp);
76     int kl = 0;
77     for (auto t = tmp->dvec().begin(); t != tmp->dvec().end(); ++t, ++ijkl, ++kl) {
78       *ebra->data(ijkl) = **t;
79       const int l = kl / norb_;
80       const int k = kl - l * norb_;
81       if (l == i) *ebra->data(ijkl) -= *dbra->data(k + norb_ * j);
82     }
83   }
84   return ebra;
85 }
86 
87 
88 namespace bagel {
89   class RDM2derivTask {
90     protected:
91       const int ij_;
92       const int kl_;
93       shared_ptr<Matrix> emat_;
94       shared_ptr<const Matrix> dmat_;
95       shared_ptr<const Dvec> cc_;
96       const int norb_;
97       const int size_;
98       const int offset_;
99     public:
RDM2derivTask(const int ij,const int kl,shared_ptr<Matrix> e,shared_ptr<const Matrix> d,shared_ptr<const Dvec> cc,const int norb,const int size,const int offset)100       RDM2derivTask(const int ij, const int kl, shared_ptr<Matrix> e, shared_ptr<const Matrix> d, shared_ptr<const Dvec> cc, const int norb, const int size, const int offset)
101         : ij_(ij), kl_(kl), emat_(e), dmat_(d), cc_(cc), norb_(norb), size_(size), offset_(offset) { }
102 
compute()103       void compute() {
104         const int lena = cc_->det()->lena();
105         const int lenb = cc_->det()->lenb();
106         const int norb2 = norb_ * norb_;
107         const int j = ij_ / norb_;
108         const int i = ij_ - j * norb_;
109         const int l = kl_ / norb_;
110         const int k = kl_ - l * norb_;
111         const int klij = kl_ + ij_ * norb2;
112 
113         for (auto& iter : cc_->det()->phia(k, l)) {
114           const size_t iaJ = iter.source;
115           const size_t iaI = iter.target;
116           const double sign = static_cast<double>(iter.sign);
117           for (size_t ib = 0; ib != lenb; ++ib) {
118             const size_t iJ = ib + iaJ * lenb;
119             const size_t iI = ib + iaI * lenb;
120             if ((iJ - offset_) < size_ && iJ >= offset_)
121               emat_->element(iJ-offset_, klij) += sign * dmat_->element(iI, ij_);
122           }
123         }
124 
125         for (size_t ia = 0; ia != lena; ++ia) {
126           for (auto& iter : cc_->det()->phib(k, l)) {
127             const size_t ibJ = iter.source;
128             const size_t ibI = iter.target;
129             const double sign = static_cast<double>(iter.sign);
130             const size_t iJ = ibJ + ia * lenb;
131             const size_t iI = ibI + ia * lenb;
132             if ((iJ - offset_) < size_ && iJ >= offset_)
133               emat_->element(iJ-offset_, klij) += sign * dmat_->element(iI, ij_);
134           }
135         }
136 
137         if (i == l) {
138           const int kj = k + j * norb_;
139           blas::ax_plus_y_n(-1.0, dmat_->element_ptr(offset_, kj), size_, emat_->element_ptr(0, klij));
140         }
141       }
142   };
143 }
144 
145 
rdm2deriv_offset(const int target,const size_t offset,const size_t size,shared_ptr<const Matrix> dmat,const bool parallel) const146 shared_ptr<Matrix> FCI::rdm2deriv_offset(const int target, const size_t offset, const size_t size, shared_ptr<const Matrix> dmat, const bool parallel) const {
147   const size_t norb2 = norb_ * norb_;
148   auto emat = make_shared<Matrix>(size, norb2*norb2, /*local=*/!parallel);
149 
150   TaskQueue<RDM2derivTask> task(norb2*(norb2+1)/2);
151   for (int ij = 0; ij != norb2; ++ij) {
152     for (int kl = ij; kl != norb2; ++kl) {
153       if (((kl-ij) % mpi__->size() == mpi__->rank()) || !parallel)
154         task.emplace_back(ij, kl, emat, dmat, cc_, norb_, size, offset);
155     }
156   }
157   task.compute();
158 
159   if (parallel)
160     emat->allreduce();
161 
162   for (size_t ij = 0; ij != norb2; ++ij)
163     for (size_t kl = 0; kl != ij; ++kl) {
164       const size_t klij = kl + ij * norb2;
165       const size_t ijkl = ij + kl * norb2;
166       for (size_t iJ = 0; iJ != size; ++iJ)
167         emat->element(iJ, klij) = emat->element(iJ, ijkl);
168     }
169 
170   return emat;
171 }
172 
173 
174 namespace bagel {
175   class RDM3derivTask {
176     protected:
177       const int ij_;
178       const int kl_;
179       shared_ptr<Matrix> fock_fbra_;
180       shared_ptr<const Matrix> fock_ebra_mat_;
181       shared_ptr<const Matrix> ebra_;
182       shared_ptr<const Matrix> fock_;
183       shared_ptr<const Dvec> cc_;
184       const int norb_;
185       const int size_;
186       const int offset_;
187     public:
RDM3derivTask(const int ij,const int kl,shared_ptr<Matrix> f,shared_ptr<const Matrix> fe,shared_ptr<const Matrix> e,shared_ptr<const Matrix> fock,shared_ptr<const Dvec> cc,const int norb,const int size,const int offset)188       RDM3derivTask(const int ij, const int kl, shared_ptr<Matrix> f, shared_ptr<const Matrix> fe, shared_ptr<const Matrix> e, shared_ptr<const Matrix> fock, shared_ptr<const Dvec> cc, const int norb, const int size, const int offset)
189         : ij_(ij), kl_(kl), fock_fbra_(f), fock_ebra_mat_(fe), ebra_(e), fock_(fock), cc_(cc), norb_(norb), size_(size), offset_(offset) { }
190 
compute()191       void compute() {
192         const size_t norb2 = norb_ * norb_;
193         const size_t norb3 = norb2 * norb_;
194         const int lena = cc_->det()->lena();
195         const int lenb = cc_->det()->lenb();
196         const int j = ij_ / norb_;
197         const int i = ij_ - j * norb_;
198         const int l = kl_ / norb_;
199         const int k = kl_ - l * norb_;
200         const int klij = kl_ + ij_ * norb2;
201 
202         for (auto& iter : cc_->det()->phia(k, l)) {
203           const size_t iaJ = iter.source;
204           const size_t iaI = iter.target;
205           const double sign = static_cast<double>(iter.sign);
206 
207           for (size_t ib = 0; ib != lenb; ++ib) {
208             const size_t iI = ib + iaI * lenb;
209             const size_t iJ = ib + iaJ * lenb;
210             if ((iJ - offset_) < size_ && iJ >= offset_)
211               fock_fbra_->element(iJ-offset_, klij) += sign * fock_ebra_mat_->element(iI, ij_);
212           }
213         }
214 
215         for (size_t ia = 0; ia != lena; ++ia) {
216           for (auto& iter : cc_->det()->phib(k, l)) {
217             const size_t ibJ = iter.source;
218             const size_t ibI = iter.target;
219             const double sign = static_cast<double>(iter.sign);
220             const size_t iI = ibI + ia * lenb;
221             const size_t iJ = ibJ + ia * lenb;
222             if ((iJ - offset_) < size_ && iJ >= offset_)
223               fock_fbra_->element(iJ-offset_, klij) += sign * fock_ebra_mat_->element(iI, ij_);
224           }
225         }
226 
227         if (i == l) {
228           const int kj = k + j * norb_;
229           blas::ax_plus_y_n(-1.0, fock_ebra_mat_->element_ptr(offset_, kj), size_, fock_fbra_->element_ptr(0, klij));
230         }
231 
232         for (int n = 0; n != norb_; ++n) {
233           const size_t ijkn = j * norb3 + i * norb2 + n * norb_ + k;
234           for (size_t iJ = 0; iJ != size_; ++iJ)
235             fock_fbra_->element(iJ, klij) -= ebra_->element(iJ, ijkn) * fock_->element(l, n);
236         }
237       }
238   };
239 }
240 
241 
rdm2fderiv(const int target,shared_ptr<const Matrix> fock,shared_ptr<const Matrix> dmat) const242 shared_ptr<Matrix> FCI::rdm2fderiv(const int target, shared_ptr<const Matrix> fock, shared_ptr<const Matrix> dmat) const {
243   auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
244   cc_->set_det(detex);
245   shared_ptr<Civec> cbra = cc_->data(target);
246 
247   const size_t norb2 = norb_ * norb_;
248   const size_t norb4 = norb2 * norb2;
249 
250   // Fock-weighted 2RDM derivative construction is multipassed and parallelized:
251   //  (1) When ndet > 10000, (ndet < 10000 -> so fast, almost no gain)
252   //  and (2) When we have processes more than one
253   //  OR  (3) When the number of words in <I|E_ij,kl|0> is larger than (10,10) case (635,040,000)
254   const size_t ndet = cbra->det()->size();
255   const size_t ijmax = 635040001;
256   const size_t ijnum = ndet * norb4;
257   const size_t npass = ((mpi__->size() * 2 > ((ijnum - 1)/ijmax + 1)) && (mpi__->size() != 1) && ndet > 10000) ? mpi__->size() * 2 : (ijnum - 1) / ijmax + 1;
258   const size_t nsize = (ndet - 1) / npass + 1;
259 
260   // form [J|k+l|0] = <J|m+k+ln|0> f_mn (multipassing with <J| )
261   auto fock_emat = make_shared<Matrix>(ndet, norb2);
262   for (size_t ipass = 0; ipass != npass; ++ipass) {
263     if (ipass % mpi__->size() != mpi__->rank()) continue;
264 
265     const size_t ioffset = ipass * nsize;
266     const size_t isize = (ipass != (npass - 1)) ? nsize : ndet - ioffset;
267     shared_ptr<Matrix> ebra;
268     ebra = rdm2deriv_offset(target, ioffset, isize, dmat, /*parallel=*/false);
269     for (size_t kl = 0; kl != norb2; ++kl) {
270       for (size_t mn = 0; mn != norb2; ++mn) {
271         const size_t n = mn / norb_;
272         const size_t m = mn - n * norb_;
273         const size_t klmn = kl * norb2 + mn;
274         for (size_t iI = 0; iI != isize; ++iI)
275           fock_emat->element(iI + ioffset, kl) += fock->element(m, n) * ebra->element(iI, klmn);
276       }
277     }
278   }
279   fock_emat->allreduce();
280 
281   return fock_emat;
282 }
283 
284 
285 tuple<shared_ptr<Matrix>,shared_ptr<Matrix>>
rdm3deriv(const int target,shared_ptr<const Matrix> fock,const size_t offset,const size_t size,shared_ptr<const Matrix> dmat,shared_ptr<const Matrix> fock_emat) const286   FCI::rdm3deriv(const int target, shared_ptr<const Matrix> fock, const size_t offset, const size_t size, shared_ptr<const Matrix> dmat, shared_ptr<const Matrix> fock_emat) const {
287 #ifndef HAVE_MPI_H
288   throw logic_error("FCI::rdm3deriv should not be called without MPI");
289 #endif
290   auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
291   cc_->set_det(detex);
292   shared_ptr<Civec> cbra = cc_->data(target);
293 
294   const size_t norb2 = norb_ * norb_;
295   const size_t norb4 = norb2 * norb2;
296 
297   // then form [L|k+i+jl|0] <- <L|i+j|K>[K|k+l|0] + ...
298   auto fock_fbra = make_shared<Matrix>(size, norb4, /*local=*/false);
299   // Set ebra within the pass. This is 2RDM derivative
300   auto ebra = rdm2deriv_offset(target, offset, size, dmat);
301 
302   // RDM3deriv contruction is task-base threaded
303   TaskQueue<RDM3derivTask> task(norb2 * (norb2 + 1) / 2);
304   for (int ij = 0; ij != norb2; ++ij) {
305     for (int kl = ij; kl != norb2; ++kl) {
306       if ((kl-ij) % mpi__->size() == mpi__->rank())
307         task.emplace_back(ij, kl, fock_fbra, fock_emat, ebra, fock, cc_, norb_, size, offset);
308     }
309   }
310   task.compute();
311   fock_fbra->allreduce();
312 
313   for (size_t ij = 0; ij != norb2; ++ij)
314     for (size_t kl = 0; kl != ij; ++kl) {
315       const size_t klij = kl + ij * norb2;
316       const size_t ijkl = ij + kl * norb2;
317       for (size_t iJ = 0; iJ != size; ++iJ)
318         fock_fbra->element(iJ, klij) = fock_fbra->element(iJ, ijkl);
319     }
320 
321   return make_tuple(ebra, fock_fbra);
322 }
323