1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: fci_rdmderiv.cc
4 // Copyright (C) 2011 Toru Shiozaki
5 //
6 // Author: Toru Shiozaki <shiozaki@northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program. If not, see <http://www.gnu.org/licenses/>.
23 //
24
25
26 #include <src/ci/fci/fci.h>
27 #include <src/wfn/rdm.h>
28 #include <src/util/math/algo.h>
29 #include <src/util/parallel/rmawindow.h>
30 #include <src/util/taskqueue.h>
31
32 using namespace std;
33 using namespace bagel;
34
35
rdm1deriv(const int target) const36 shared_ptr<Dvec> FCI::rdm1deriv(const int target) const {
37
38 auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
39 cc_->set_det(detex);
40 shared_ptr<Civec> cbra = cc_->data(target);
41
42 // 1RDM ci derivative
43 // <I|E_ij|0>
44
45 auto dbra = make_shared<Dvec>(cbra->det(), norb_ * norb_);
46 dbra->zero();
47 sigma_2a1(cbra, dbra);
48 sigma_2a2(cbra, dbra);
49
50 return dbra;
51 }
52
53
rdm2deriv(const int target) const54 shared_ptr<Dvec> FCI::rdm2deriv(const int target) const {
55 auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
56 cc_->set_det(detex);
57 shared_ptr<Civec> cbra = cc_->data(target);
58
59 // make <I|E_ij|0>
60 auto dbra = make_shared<Dvec>(cbra->det(), norb_ * norb_);
61 dbra->zero();
62 sigma_2a1(cbra, dbra);
63 sigma_2a2(cbra, dbra);
64
65 // second make <J|E_kl|I><I|E_ij|0> - delta_li <J|E_kj|0>
66 auto ebra = make_shared<Dvec>(cbra->det(), norb_ * norb_ * norb_ * norb_);
67 auto tmp = make_shared<Dvec>(cbra->det(), norb_ * norb_);
68 int ijkl = 0;
69 int ij = 0;
70 for (auto iter = dbra->dvec().begin(); iter != dbra->dvec().end(); ++iter, ++ij) {
71 const int j = ij / norb_;
72 const int i = ij - j*norb_;
73 tmp->zero();
74 sigma_2a1(*iter, tmp);
75 sigma_2a2(*iter, tmp);
76 int kl = 0;
77 for (auto t = tmp->dvec().begin(); t != tmp->dvec().end(); ++t, ++ijkl, ++kl) {
78 *ebra->data(ijkl) = **t;
79 const int l = kl / norb_;
80 const int k = kl - l * norb_;
81 if (l == i) *ebra->data(ijkl) -= *dbra->data(k + norb_ * j);
82 }
83 }
84 return ebra;
85 }
86
87
88 namespace bagel {
89 class RDM2derivTask {
90 protected:
91 const int ij_;
92 const int kl_;
93 shared_ptr<Matrix> emat_;
94 shared_ptr<const Matrix> dmat_;
95 shared_ptr<const Dvec> cc_;
96 const int norb_;
97 const int size_;
98 const int offset_;
99 public:
RDM2derivTask(const int ij,const int kl,shared_ptr<Matrix> e,shared_ptr<const Matrix> d,shared_ptr<const Dvec> cc,const int norb,const int size,const int offset)100 RDM2derivTask(const int ij, const int kl, shared_ptr<Matrix> e, shared_ptr<const Matrix> d, shared_ptr<const Dvec> cc, const int norb, const int size, const int offset)
101 : ij_(ij), kl_(kl), emat_(e), dmat_(d), cc_(cc), norb_(norb), size_(size), offset_(offset) { }
102
compute()103 void compute() {
104 const int lena = cc_->det()->lena();
105 const int lenb = cc_->det()->lenb();
106 const int norb2 = norb_ * norb_;
107 const int j = ij_ / norb_;
108 const int i = ij_ - j * norb_;
109 const int l = kl_ / norb_;
110 const int k = kl_ - l * norb_;
111 const int klij = kl_ + ij_ * norb2;
112
113 for (auto& iter : cc_->det()->phia(k, l)) {
114 const size_t iaJ = iter.source;
115 const size_t iaI = iter.target;
116 const double sign = static_cast<double>(iter.sign);
117 for (size_t ib = 0; ib != lenb; ++ib) {
118 const size_t iJ = ib + iaJ * lenb;
119 const size_t iI = ib + iaI * lenb;
120 if ((iJ - offset_) < size_ && iJ >= offset_)
121 emat_->element(iJ-offset_, klij) += sign * dmat_->element(iI, ij_);
122 }
123 }
124
125 for (size_t ia = 0; ia != lena; ++ia) {
126 for (auto& iter : cc_->det()->phib(k, l)) {
127 const size_t ibJ = iter.source;
128 const size_t ibI = iter.target;
129 const double sign = static_cast<double>(iter.sign);
130 const size_t iJ = ibJ + ia * lenb;
131 const size_t iI = ibI + ia * lenb;
132 if ((iJ - offset_) < size_ && iJ >= offset_)
133 emat_->element(iJ-offset_, klij) += sign * dmat_->element(iI, ij_);
134 }
135 }
136
137 if (i == l) {
138 const int kj = k + j * norb_;
139 blas::ax_plus_y_n(-1.0, dmat_->element_ptr(offset_, kj), size_, emat_->element_ptr(0, klij));
140 }
141 }
142 };
143 }
144
145
rdm2deriv_offset(const int target,const size_t offset,const size_t size,shared_ptr<const Matrix> dmat,const bool parallel) const146 shared_ptr<Matrix> FCI::rdm2deriv_offset(const int target, const size_t offset, const size_t size, shared_ptr<const Matrix> dmat, const bool parallel) const {
147 const size_t norb2 = norb_ * norb_;
148 auto emat = make_shared<Matrix>(size, norb2*norb2, /*local=*/!parallel);
149
150 TaskQueue<RDM2derivTask> task(norb2*(norb2+1)/2);
151 for (int ij = 0; ij != norb2; ++ij) {
152 for (int kl = ij; kl != norb2; ++kl) {
153 if (((kl-ij) % mpi__->size() == mpi__->rank()) || !parallel)
154 task.emplace_back(ij, kl, emat, dmat, cc_, norb_, size, offset);
155 }
156 }
157 task.compute();
158
159 if (parallel)
160 emat->allreduce();
161
162 for (size_t ij = 0; ij != norb2; ++ij)
163 for (size_t kl = 0; kl != ij; ++kl) {
164 const size_t klij = kl + ij * norb2;
165 const size_t ijkl = ij + kl * norb2;
166 for (size_t iJ = 0; iJ != size; ++iJ)
167 emat->element(iJ, klij) = emat->element(iJ, ijkl);
168 }
169
170 return emat;
171 }
172
173
174 namespace bagel {
175 class RDM3derivTask {
176 protected:
177 const int ij_;
178 const int kl_;
179 shared_ptr<Matrix> fock_fbra_;
180 shared_ptr<const Matrix> fock_ebra_mat_;
181 shared_ptr<const Matrix> ebra_;
182 shared_ptr<const Matrix> fock_;
183 shared_ptr<const Dvec> cc_;
184 const int norb_;
185 const int size_;
186 const int offset_;
187 public:
RDM3derivTask(const int ij,const int kl,shared_ptr<Matrix> f,shared_ptr<const Matrix> fe,shared_ptr<const Matrix> e,shared_ptr<const Matrix> fock,shared_ptr<const Dvec> cc,const int norb,const int size,const int offset)188 RDM3derivTask(const int ij, const int kl, shared_ptr<Matrix> f, shared_ptr<const Matrix> fe, shared_ptr<const Matrix> e, shared_ptr<const Matrix> fock, shared_ptr<const Dvec> cc, const int norb, const int size, const int offset)
189 : ij_(ij), kl_(kl), fock_fbra_(f), fock_ebra_mat_(fe), ebra_(e), fock_(fock), cc_(cc), norb_(norb), size_(size), offset_(offset) { }
190
compute()191 void compute() {
192 const size_t norb2 = norb_ * norb_;
193 const size_t norb3 = norb2 * norb_;
194 const int lena = cc_->det()->lena();
195 const int lenb = cc_->det()->lenb();
196 const int j = ij_ / norb_;
197 const int i = ij_ - j * norb_;
198 const int l = kl_ / norb_;
199 const int k = kl_ - l * norb_;
200 const int klij = kl_ + ij_ * norb2;
201
202 for (auto& iter : cc_->det()->phia(k, l)) {
203 const size_t iaJ = iter.source;
204 const size_t iaI = iter.target;
205 const double sign = static_cast<double>(iter.sign);
206
207 for (size_t ib = 0; ib != lenb; ++ib) {
208 const size_t iI = ib + iaI * lenb;
209 const size_t iJ = ib + iaJ * lenb;
210 if ((iJ - offset_) < size_ && iJ >= offset_)
211 fock_fbra_->element(iJ-offset_, klij) += sign * fock_ebra_mat_->element(iI, ij_);
212 }
213 }
214
215 for (size_t ia = 0; ia != lena; ++ia) {
216 for (auto& iter : cc_->det()->phib(k, l)) {
217 const size_t ibJ = iter.source;
218 const size_t ibI = iter.target;
219 const double sign = static_cast<double>(iter.sign);
220 const size_t iI = ibI + ia * lenb;
221 const size_t iJ = ibJ + ia * lenb;
222 if ((iJ - offset_) < size_ && iJ >= offset_)
223 fock_fbra_->element(iJ-offset_, klij) += sign * fock_ebra_mat_->element(iI, ij_);
224 }
225 }
226
227 if (i == l) {
228 const int kj = k + j * norb_;
229 blas::ax_plus_y_n(-1.0, fock_ebra_mat_->element_ptr(offset_, kj), size_, fock_fbra_->element_ptr(0, klij));
230 }
231
232 for (int n = 0; n != norb_; ++n) {
233 const size_t ijkn = j * norb3 + i * norb2 + n * norb_ + k;
234 for (size_t iJ = 0; iJ != size_; ++iJ)
235 fock_fbra_->element(iJ, klij) -= ebra_->element(iJ, ijkn) * fock_->element(l, n);
236 }
237 }
238 };
239 }
240
241
rdm2fderiv(const int target,shared_ptr<const Matrix> fock,shared_ptr<const Matrix> dmat) const242 shared_ptr<Matrix> FCI::rdm2fderiv(const int target, shared_ptr<const Matrix> fock, shared_ptr<const Matrix> dmat) const {
243 auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
244 cc_->set_det(detex);
245 shared_ptr<Civec> cbra = cc_->data(target);
246
247 const size_t norb2 = norb_ * norb_;
248 const size_t norb4 = norb2 * norb2;
249
250 // Fock-weighted 2RDM derivative construction is multipassed and parallelized:
251 // (1) When ndet > 10000, (ndet < 10000 -> so fast, almost no gain)
252 // and (2) When we have processes more than one
253 // OR (3) When the number of words in <I|E_ij,kl|0> is larger than (10,10) case (635,040,000)
254 const size_t ndet = cbra->det()->size();
255 const size_t ijmax = 635040001;
256 const size_t ijnum = ndet * norb4;
257 const size_t npass = ((mpi__->size() * 2 > ((ijnum - 1)/ijmax + 1)) && (mpi__->size() != 1) && ndet > 10000) ? mpi__->size() * 2 : (ijnum - 1) / ijmax + 1;
258 const size_t nsize = (ndet - 1) / npass + 1;
259
260 // form [J|k+l|0] = <J|m+k+ln|0> f_mn (multipassing with <J| )
261 auto fock_emat = make_shared<Matrix>(ndet, norb2);
262 for (size_t ipass = 0; ipass != npass; ++ipass) {
263 if (ipass % mpi__->size() != mpi__->rank()) continue;
264
265 const size_t ioffset = ipass * nsize;
266 const size_t isize = (ipass != (npass - 1)) ? nsize : ndet - ioffset;
267 shared_ptr<Matrix> ebra;
268 ebra = rdm2deriv_offset(target, ioffset, isize, dmat, /*parallel=*/false);
269 for (size_t kl = 0; kl != norb2; ++kl) {
270 for (size_t mn = 0; mn != norb2; ++mn) {
271 const size_t n = mn / norb_;
272 const size_t m = mn - n * norb_;
273 const size_t klmn = kl * norb2 + mn;
274 for (size_t iI = 0; iI != isize; ++iI)
275 fock_emat->element(iI + ioffset, kl) += fock->element(m, n) * ebra->element(iI, klmn);
276 }
277 }
278 }
279 fock_emat->allreduce();
280
281 return fock_emat;
282 }
283
284
285 tuple<shared_ptr<Matrix>,shared_ptr<Matrix>>
rdm3deriv(const int target,shared_ptr<const Matrix> fock,const size_t offset,const size_t size,shared_ptr<const Matrix> dmat,shared_ptr<const Matrix> fock_emat) const286 FCI::rdm3deriv(const int target, shared_ptr<const Matrix> fock, const size_t offset, const size_t size, shared_ptr<const Matrix> dmat, shared_ptr<const Matrix> fock_emat) const {
287 #ifndef HAVE_MPI_H
288 throw logic_error("FCI::rdm3deriv should not be called without MPI");
289 #endif
290 auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
291 cc_->set_det(detex);
292 shared_ptr<Civec> cbra = cc_->data(target);
293
294 const size_t norb2 = norb_ * norb_;
295 const size_t norb4 = norb2 * norb2;
296
297 // then form [L|k+i+jl|0] <- <L|i+j|K>[K|k+l|0] + ...
298 auto fock_fbra = make_shared<Matrix>(size, norb4, /*local=*/false);
299 // Set ebra within the pass. This is 2RDM derivative
300 auto ebra = rdm2deriv_offset(target, offset, size, dmat);
301
302 // RDM3deriv contruction is task-base threaded
303 TaskQueue<RDM3derivTask> task(norb2 * (norb2 + 1) / 2);
304 for (int ij = 0; ij != norb2; ++ij) {
305 for (int kl = ij; kl != norb2; ++kl) {
306 if ((kl-ij) % mpi__->size() == mpi__->rank())
307 task.emplace_back(ij, kl, fock_fbra, fock_emat, ebra, fock, cc_, norb_, size, offset);
308 }
309 }
310 task.compute();
311 fock_fbra->allreduce();
312
313 for (size_t ij = 0; ij != norb2; ++ij)
314 for (size_t kl = 0; kl != ij; ++kl) {
315 const size_t klij = kl + ij * norb2;
316 const size_t ijkl = ij + kl * norb2;
317 for (size_t iJ = 0; iJ != size; ++iJ)
318 fock_fbra->element(iJ, klij) = fock_fbra->element(iJ, ijkl);
319 }
320
321 return make_tuple(ebra, fock_fbra);
322 }
323