1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: fci_rdm.cc
4 // Copyright (C) 2011 Toru Shiozaki
5 //
6 // Author: Toru Shiozaki <shiozaki@northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program. If not, see <http://www.gnu.org/licenses/>.
23 //
24
25
26 #include <src/ci/fci/fci.h>
27 #include <src/util/prim_op.h>
28 #include <src/util/math/algo.h>
29 #include <src/wfn/rdm.h>
30
31 using namespace std;
32 using namespace bagel;
33
FCI_bare(shared_ptr<const CIWfn> ci)34 FCI_bare::FCI_bare(shared_ptr<const CIWfn> ci) {
35 print_thresh_ = 1.0e-8;
36 nelea_ = ci->det()->nelea();
37 neleb_ = ci->det()->neleb();
38 ncore_ = ci->ncore();
39 norb_ = ci->nact();
40 nstate_ = ci->nstates();
41 energy_ = ci->energies();
42 cc_ = ci->civectors() ? ci->civectors()->copy() : nullptr;
43 det_ = ci->det();
44 rdm1_ = make_shared<VecRDM<1>>();
45 rdm2_ = make_shared<VecRDM<2>>();
46 }
47
48
compute_rdm12()49 void FCI::compute_rdm12() {
50 // Needs initialization here because we use daxpy.
51 // For nstate_ == 1, rdm1_av_ = rdm1_->at(0).
52 if (rdm1_av_ == nullptr && nstate_ > 1) {
53 rdm1_av_ = make_shared<RDM<1>>(norb_);
54 rdm2_av_ = make_shared<RDM<2>>(norb_);
55 } else if (nstate_ > 1) {
56 rdm1_av_->zero();
57 rdm2_av_->zero();
58 }
59 // we need expanded lists
60 auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, /*compressed=*/false, /*mute=*/true);
61 cc_->set_det(detex);
62
63 for (int i = 0; i != nstate_; ++i)
64 compute_rdm12(i, i);
65
66 // calculate state averaged RDMs
67 if (nstate_ != 1) {
68 for (int ist = 0; ist != nstate_; ++ist) {
69 rdm1_av_->ax_plus_y(weight_[ist], rdm1_->at(ist));
70 rdm2_av_->ax_plus_y(weight_[ist], rdm2_->at(ist));
71 }
72 } else {
73 rdm1_av_ = rdm1_->at(0,0);
74 rdm2_av_ = rdm2_->at(0,0);
75 }
76
77 cc_->set_det(det_);
78 }
79
80
compute_rdm12(const int ist,const int jst)81 void FCI::compute_rdm12(const int ist, const int jst) {
82 if (det_->compress()) {
83 auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
84 cc_->set_det(detex);
85 }
86
87 shared_ptr<Civec> ccbra = cc_->data(ist);
88 shared_ptr<Civec> ccket = cc_->data(jst);
89
90 shared_ptr<RDM<1>> rdm1;
91 shared_ptr<RDM<2>> rdm2;
92 tie(rdm1, rdm2) = compute_rdm12_from_civec(ccbra, ccket);
93
94 // setting to private members.
95 rdm1_->emplace(ist, jst, rdm1);
96 rdm2_->emplace(ist, jst, rdm2);
97
98 cc_->set_det(det_);
99 }
100
101
102 tuple<shared_ptr<RDM<1>>, shared_ptr<RDM<2>>>
compute_rdm12_from_civec(shared_ptr<const Civec> cbra,shared_ptr<const Civec> cket) const103 FCI::compute_rdm12_from_civec(shared_ptr<const Civec> cbra, shared_ptr<const Civec> cket) const {
104
105 // since we consider here number conserving operators...
106 auto dbra = make_shared<Dvec>(cbra->det(), norb_*norb_);
107 sigma_2a1(cbra, dbra);
108 sigma_2a2(cbra, dbra);
109
110 shared_ptr<Dvec> dket;
111 // if bra and ket vectors are different, we need to form Sigma for ket as well.
112 if (cbra != cket) {
113 dket = make_shared<Dvec>(cket->det(), norb_*norb_);
114 sigma_2a1(cket, dket);
115 sigma_2a2(cket, dket);
116 } else {
117 dket = dbra;
118 }
119
120 return compute_rdm12_last_step(dbra, dket, cbra);
121 }
122
123
124 tuple<shared_ptr<RDM<1>>, shared_ptr<RDM<2>>>
compute_rdm12_last_step(shared_ptr<const Dvec> dbra,shared_ptr<const Dvec> dket,shared_ptr<const Civec> cibra) const125 FCI::compute_rdm12_last_step(shared_ptr<const Dvec> dbra, shared_ptr<const Dvec> dket, shared_ptr<const Civec> cibra) const {
126
127 const int nri = cibra->asize()*cibra->lenb();
128
129 // 1RDM c^dagger <I|\hat{E}|0>
130 // 2RDM \sum_I <0|\hat{E}|I> <I|\hat{E}|0>
131 auto rdm1 = make_shared<RDM<1>>(norb_);
132 auto rdm2 = make_shared<RDM<2>>(norb_);
133 {
134 auto cibra_data = make_shared<VectorB>(nri);
135 copy_n(cibra->data(), nri, cibra_data->data());
136
137 auto dketv = btas::group(*dket,0,2);
138 auto rdm1t = btas::group(*rdm1,0,2);
139 btas::contract(1.0, dketv, {0,1}, *cibra_data, {0}, 0.0, rdm1t, {1});
140
141 auto dbrav = btas::group(*dbra,0,2);
142 auto rdm2t = group(group(*rdm2, 2,4), 0,2);
143 btas::contract(1.0, dbrav, {1,0}, dketv, {1,2}, 0.0, rdm2t, {0,2});
144 }
145
146 // sorting... a bit stupid but cheap anyway
147 // This is since we transpose operator pairs in dgemm - cheaper to do so after dgemm (usually Nconfig >> norb_**2).
148 unique_ptr<double[]> buf(new double[norb_*norb_]);
149 for (int i = 0; i != norb_; ++i) {
150 for (int k = 0; k != norb_; ++k) {
151 copy_n(&rdm2->element(0,0,k,i), norb_*norb_, buf.get());
152 blas::transpose(buf.get(), norb_, norb_, rdm2->element_ptr(0,0,k,i));
153 }
154 }
155
156 // put in diagonal into 2RDM
157 // Gamma{i+ k+ l j} = Gamma{i+ j k+ l} - delta_jk Gamma{i+ l}
158 for (int i = 0; i != norb_; ++i)
159 for (int k = 0; k != norb_; ++k)
160 for (int j = 0; j != norb_; ++j)
161 rdm2->element(j,k,k,i) -= rdm1->element(j,i);
162
163 return tie(rdm1, rdm2);
164 }
165
166
167 tuple<shared_ptr<RDM<1>>, shared_ptr<RDM<2>>>
compute_rdm12_av_from_dvec(shared_ptr<const Dvec> dbra,shared_ptr<const Dvec> dket,shared_ptr<const Determinants> o) const168 FCI::compute_rdm12_av_from_dvec(shared_ptr<const Dvec> dbra, shared_ptr<const Dvec> dket, shared_ptr<const Determinants> o) const {
169
170 if (o != nullptr) {
171 dbra->set_det(o);
172 dket->set_det(o);
173 }
174
175 auto rdm1 = make_shared<RDM<1>>(norb_);
176 auto rdm2 = make_shared<RDM<2>>(norb_);
177
178 assert(dbra->ij() == dket->ij() && dbra->det() == dket->det());
179
180 for (int i = 0; i != dbra->ij(); ++i) {
181 shared_ptr<RDM<1>> r1;
182 shared_ptr<RDM<2>> r2;
183 tie(r1, r2) = compute_rdm12_from_civec(dbra->data(i), dket->data(i));
184 rdm1->ax_plus_y(weight_[i], r1);
185 rdm2->ax_plus_y(weight_[i], r2);
186 }
187
188 if (o != nullptr) {
189 dbra->set_det(det_);
190 dket->set_det(det_);
191 }
192
193 return tie(rdm1, rdm2);
194 }
195
196 // computes 3 and 4RDM
rdm34(const int ist,const int jst) const197 tuple<shared_ptr<RDM<3>>, shared_ptr<RDM<4>>> FCI::rdm34(const int ist, const int jst) const {
198 auto rdm3 = make_shared<RDM<3>>(norb_);
199 auto rdm4 = make_shared<RDM<4>>(norb_);
200
201 auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
202 cc_->set_det(detex);
203
204 shared_ptr<Civec> cbra = cc_->data(ist);
205 shared_ptr<Civec> cket = cc_->data(jst);
206
207 // first make <I|E_ij|0>
208 auto dbra = make_shared<Dvec>(cbra->det(), norb_*norb_);
209 sigma_2a1(cbra, dbra);
210 sigma_2a2(cbra, dbra);
211
212 shared_ptr<Dvec> dket = dbra;
213 if (cbra != cket) {
214 dket = dbra->clone();
215 sigma_2a1(cket, dket);
216 sigma_2a2(cket, dket);
217 }
218
219 // second make <J|E_kl|I><I|E_ij|0> - delta_il <J|E_kj|0>
220 auto make_evec_half = [this](shared_ptr<Dvec> d, shared_ptr<Matrix> e, const int dsize, const int offset) {
221 const int norb2 = norb_ * norb_;
222 const int lena = cc_->det()->lena();
223 const int lenb = cc_->det()->lenb();
224 int no = 0;
225
226 for (int ij = 0; ij != norb2; ++ij) {
227 const int j = ij/norb_;
228 const int i = ij-j*norb_;
229
230 for (int kl = ij; kl != norb2; ++kl) {
231 const int l = kl/norb_;
232 const int k = kl-l*norb_;
233
234 for (auto& iter : cc_->det()->phia(k,l)) {
235 size_t iaJ = iter.source;
236 size_t iaI = iter.target;
237 double sign = static_cast<double>(iter.sign);
238 for (size_t ib = 0; ib != lenb; ++ib) {
239 size_t iI = ib + iaI*lenb;
240 size_t iJ = ib + iaJ*lenb;
241 if ((iJ - offset) < dsize && iJ >= offset)
242 e->element(iJ-offset, no) += sign * d->data(ij)->data(iI);
243 }
244 }
245
246 for (size_t ia = 0; ia != lena; ++ia) {
247 for (auto& iter : cc_->det()->phib(k,l)) {
248 size_t ibJ = iter.source;
249 size_t ibI = iter.target;
250 double sign = static_cast<double>(iter.sign);
251 size_t iI = ibI + ia*lenb;
252 size_t iJ = ibJ + ia*lenb;
253 if ((iJ - offset) < dsize && iJ >= offset)
254 e->element(iJ-offset, no) += sign * d->data(ij)->data(iI);
255 }
256 }
257
258 if (i == l) {
259 const int kj = k+j*norb_;
260 for (size_t iJ = offset; iJ != offset+dsize; ++iJ) {
261 e->element(iJ-offset, no) -= d->data(kj)->data(iJ);
262 }
263 }
264 ++no;
265 }
266 }
267 };
268
269 // RDM3, RDM4 construction is multipassed and parallelized:
270 // (1) When ndet > 10000, (ndet < 10000 -> too small, almost no gain)
271 // and (2) When we have processes more than one
272 // OR (3) When the number of words in <I|E_ij,kl|0> is larger than (10,10) case (635,040,000)
273 const size_t ndet = cbra->det()->size();
274 const size_t norb2 = norb_ * norb_;
275 const size_t ijmax = 635040001 * 2;
276 const size_t ijnum = ndet * norb2 * norb2;
277 const size_t npass = ((mpi__->size() * 2 > ((ijnum-1)/ijmax + 1)) && (mpi__->size() != 1) && ndet > 10000) ? mpi__->size() * 2 : (ijnum-1) / ijmax + 1;
278 const size_t nsize = (ndet-1) / npass + 1;
279 Timer timer;
280 if (npass > 1) {
281 cout << " * Third and fourth order RDM (" << setw(2) << ist + 1 << "," << setw(2) << jst + 1 << ") evaluation" << endl;
282 cout << " will be done with " << npass << " passes" << endl;
283 }
284
285 rdm3->zero();
286 rdm4->zero();
287
288 // multipassing through {I}
289 for (size_t ipass = 0; ipass != npass; ++ipass) {
290 if (ipass % mpi__->size() != mpi__->rank()) continue;
291
292 const size_t ioffset = ipass * nsize;
293 const size_t isize = (ipass != (npass - 1)) ? nsize : ndet - ioffset;
294 const size_t halfsize = norb2 * (norb2 + 1) / 2;
295 auto eket_half = make_shared<Matrix>(isize, halfsize, /*local=*/true);
296 make_evec_half(dket, eket_half, isize, ioffset);
297
298 auto dbram = make_shared<Matrix>(isize, norb2, /*local=*/true);
299 for (size_t ij = 0; ij != norb2; ++ij)
300 copy_n(&(dbra->data(ij)->data(ioffset)), isize, dbram->element_ptr(0, ij));
301
302 // put in third-order RDM: <0|E_mn|I><I|E_ij < kl|0>
303 auto tmp3 = make_shared<Matrix>(*dbram % *eket_half);
304 auto tmp3_full = make_shared<Matrix>(norb2, norb2 * norb2, /*local=*/true);
305
306 for (size_t mn = 0; mn != norb2; ++mn) {
307 int no = 0;
308 for (size_t ij = 0; ij != norb2; ++ij) {
309 for (size_t kl = ij; kl != norb2; ++kl) {
310 int ijkl = ij + kl*norb2;
311 int klij = kl + ij*norb2;
312 tmp3_full->element(mn, ijkl) = tmp3->element(mn, no);
313 tmp3_full->element(mn, klij) = tmp3->element(mn, no);
314 ++no;
315 }
316 }
317 }
318 sort_indices<1,0,2,1,1,1,1>(tmp3_full->data(), rdm3->data(), norb_, norb_, norb2*norb2);
319
320 // put in fourth-order RDM: <0|E_ij > kl|I><I|E_mn < op|0>
321 shared_ptr<Matrix> ebra_half = eket_half;
322 if (cbra != cket) {
323 ebra_half = eket_half->clone();
324 make_evec_half(dbra, ebra_half, isize, ioffset);
325 }
326 auto tmp4 = make_shared<Matrix>(*ebra_half % *eket_half);
327 auto tmp4_full = make_shared<Matrix>(norb2 * norb2, norb2 * norb2, /*local=*/true);
328
329 {
330 int nklij = 0;
331 for (size_t kl = 0; kl != norb2; ++kl) {
332 for (size_t ij = kl; ij != norb2; ++ij) {
333 int nmnop = 0;
334 int klij = kl+ij*norb2;
335 int ijkl = ij+kl*norb2;
336 for (size_t mn = 0; mn != norb2; ++mn) {
337 for (size_t op = mn; op != norb2; ++op) {
338 int mnop = mn + op*norb2;
339 int opmn = op + mn*norb2;
340 tmp4_full->element(ijkl, mnop) = tmp4->element(nklij, nmnop);
341 tmp4_full->element(klij, mnop) = tmp4->element(nklij, nmnop);
342 tmp4_full->element(ijkl, opmn) = tmp4->element(nklij, nmnop);
343 tmp4_full->element(klij, opmn) = tmp4->element(nklij, nmnop);
344 ++nmnop;
345 }
346 }
347 ++nklij;
348 }
349 }
350 }
351 sort_indices<1,0,3,2,4,1,1,1,1>(tmp4_full->data(), rdm4->data(), norb_, norb_, norb_, norb_, norb2*norb2);
352 }
353
354 rdm3->allreduce();
355 rdm4->allreduce();
356
357 if (npass > 1) {
358 timer.tick_print("RDM evaluation (multipassing)");
359 }
360
361 // The remaining terms can be evaluated without multipassing
362 {
363 // then perform Eq. 49 of JCP 89 5803 (Werner's MRCI paper)
364 // we assume that rdm2_[ist] is set
365 for (int i0 = 0; i0 != norb_; ++i0)
366 for (int i1 = 0; i1 != norb_; ++i1)
367 for (int i2 = 0; i2 != norb_; ++i2)
368 for (int i3 = 0; i3 != norb_; ++i3) {
369 blas::ax_plus_y_n(-1.0, rdm2_->at(ist, jst)->element_ptr(0, i2, i1, i0), norb_, rdm3->element_ptr(0, i3, i3, i2, i1, i0));
370 blas::ax_plus_y_n(-1.0, rdm2_->at(ist, jst)->element_ptr(0, i0, i3, i2), norb_, rdm3->element_ptr(0, i1, i3, i2, i1, i0));
371 }
372 }
373
374 {
375 for (int l = 0; l != norb_; ++l)
376 for (int k = 0; k != norb_; ++k)
377 for (int j = 0; j != norb_; ++j)
378 for (int b = 0; b != norb_; ++b) {
379 blas::ax_plus_y_n(-1.0, rdm3->element_ptr(0,0,0,k,b,l), norb_*norb_*norb_, rdm4->element_ptr(0,0,0,j,j,k,b,l));
380 blas::ax_plus_y_n(-1.0, rdm3->element_ptr(0,0,0,l,b,k), norb_*norb_*norb_, rdm4->element_ptr(0,0,0,j,b,k,j,l));
381 for (int i = 0; i != norb_; ++i) {
382 blas::ax_plus_y_n(-1.0, rdm2_->at(ist, jst)->element_ptr(0,k,b,l), norb_, rdm4->element_ptr(0,i,b,j,i,k,j,l));
383 blas::ax_plus_y_n(-1.0, rdm2_->at(ist, jst)->element_ptr(0,l,b,k), norb_, rdm4->element_ptr(0,i,b,j,j,k,i,l));
384 for (int d = 0; d != norb_; ++d) {
385 blas::ax_plus_y_n(-1.0, rdm3->element_ptr(0,k,b,j,d,l), norb_, rdm4->element_ptr(0,i,b,j,i,k,d,l));
386 blas::ax_plus_y_n(-1.0, rdm3->element_ptr(0,l,b,j,d,k), norb_, rdm4->element_ptr(0,i,b,j,d,k,i,l));
387 }
388 }
389 }
390 }
391
392 cc_->set_det(det_);
393
394 return make_tuple(rdm3, rdm4);
395 }
396
397 // computes 3 and Fock-weighted 4RDM
rdm34f(const int ist,const int jst,shared_ptr<const Matrix> fock) const398 tuple<shared_ptr<RDM<3>>, shared_ptr<RDM<3>>> FCI::rdm34f(const int ist, const int jst, shared_ptr<const Matrix> fock) const {
399 auto rdm3 = make_shared<RDM<3>>(norb_);
400 auto rdm4f = make_shared<RDM<3>>(norb_);
401
402 auto detex = make_shared<Determinants>(norb_, nelea_, neleb_, false, /*mute=*/true);
403 cc_->set_det(detex);
404
405 shared_ptr<Civec> cbra = cc_->data(ist);
406 shared_ptr<Civec> cket = cc_->data(jst);
407
408 // first make <I|E_ij|0>
409 auto dbra = make_shared<Dvec>(cbra->det(), norb_*norb_);
410 sigma_2a1(cbra, dbra);
411 sigma_2a2(cbra, dbra);
412
413 shared_ptr<Dvec> dket = dbra;
414 if (cbra != cket) {
415 dket = dbra->clone();
416 sigma_2a1(cket, dket);
417 sigma_2a2(cket, dket);
418 }
419
420 // second make <J|E_kl|I><I|E_ij|0> - delta_il <J|E_kj|0>
421 auto make_evec_half = [this](shared_ptr<Dvec> d, shared_ptr<Matrix> e, const int dsize, const int offset) {
422 const int norb2 = norb_ * norb_;
423 const int lena = cc_->det()->lena();
424 const int lenb = cc_->det()->lenb();
425 int no = 0;
426
427 for (int ij = 0; ij != norb2; ++ij) {
428 const int j = ij/norb_;
429 const int i = ij-j*norb_;
430
431 for (int kl = ij; kl != norb2; ++kl) {
432 const int l = kl/norb_;
433 const int k = kl-l*norb_;
434
435 for (auto& iter : cc_->det()->phia(k,l)) {
436 size_t iaJ = iter.source;
437 size_t iaI = iter.target;
438 double sign = static_cast<double>(iter.sign);
439 for (size_t ib = 0; ib != lenb; ++ib) {
440 size_t iI = ib + iaI*lenb;
441 size_t iJ = ib + iaJ*lenb;
442 if ((iJ - offset) < dsize && iJ >= offset)
443 e->element(iJ-offset, no) += sign * d->data(ij)->data(iI);
444 }
445 }
446
447 for (size_t ia = 0; ia != lena; ++ia) {
448 for (auto& iter : cc_->det()->phib(k,l)) {
449 size_t ibJ = iter.source;
450 size_t ibI = iter.target;
451 double sign = static_cast<double>(iter.sign);
452 size_t iI = ibI + ia*lenb;
453 size_t iJ = ibJ + ia*lenb;
454 if ((iJ - offset) < dsize && iJ >= offset)
455 e->element(iJ-offset, no) += sign * d->data(ij)->data(iI);
456 }
457 }
458
459 if (i == l) {
460 const int kj = k+j*norb_;
461 for (size_t iJ = offset; iJ != offset+dsize; ++iJ) {
462 e->element(iJ-offset, no) -= d->data(kj)->data(iJ);
463 }
464 }
465 ++no;
466 }
467 }
468 };
469
470 // RDM3, RDM4 construction is multipassed and parallelized:
471 // (1) When ndet > 10000, (ndet < 10000 -> too small, almost no gain)
472 // and (2) When we have processes more than one
473 // OR (3) When the number of words in <I|E_ij,kl|0> is larger than (10,10) case (635,040,000)
474 const size_t ndet = cbra->det()->size();
475 const size_t norb2 = norb_ * norb_;
476 const size_t ijmax = 635040001 * 2;
477 const size_t ijnum = ndet * norb2 * norb2;
478 const size_t npass = ((mpi__->size() * 2 > ((ijnum-1)/ijmax + 1)) && (mpi__->size() != 1) && ndet > 10000) ? mpi__->size() * 2 : (ijnum-1) / ijmax + 1;
479 const size_t nsize = (ndet-1) / npass + 1;
480 Timer timer;
481 if (npass > 1) {
482 cout << " * Third and Fock-weighted fourth order RDM (" << setw(2) << ist + 1 << "," << setw(2) << jst + 1 << ") evaluation" << endl;
483 cout << " will be done with " << npass << " passes" << endl;
484 }
485
486 rdm3->zero();
487 rdm4f->zero();
488
489 // multipassing through {I}
490 for (size_t ipass = 0; ipass != npass; ++ipass) {
491 if (ipass % mpi__->size() != mpi__->rank()) continue;
492
493 const size_t ioffset = ipass * nsize;
494 const size_t isize = (ipass != (npass - 1)) ? nsize : ndet - ioffset;
495 const size_t halfsize = norb2 * (norb2 + 1) / 2;
496 auto eket_half = make_shared<Matrix>(isize, halfsize, /*local=*/true);
497 make_evec_half(dket, eket_half, isize, ioffset);
498 shared_ptr<Matrix> ebra_half = eket_half;
499 if (cbra != cket) {
500 ebra_half = eket_half->clone();
501 make_evec_half(dbra, ebra_half, isize, ioffset);
502 }
503
504 auto feket = make_shared<Matrix>(isize, norb2, /*local=*/true);
505 feket->zero();
506 {
507 auto eket_full = make_shared<Matrix>(isize, norb2 * norb2, /*local=*/true);
508 size_t no = 0;
509 for (size_t kl = 0; kl != norb2; ++kl)
510 for (size_t ij = kl; ij != norb2; ++ij) {
511 size_t ijkl = ij + kl*norb2;
512 size_t klij = kl + ij*norb2;
513 for (size_t iI = 0; iI != isize; ++iI) {
514 eket_full->element(iI, ijkl) = eket_half->element(iI, no);
515 eket_full->element(iI, klij) = eket_half->element(iI, no);
516 }
517 ++no;
518 }
519 dgemv_("N", isize*norb_*norb_, norb_*norb_, 1.0, eket_full->data(), isize*norb_*norb_, fock->data(), 1, 0.0, feket->data(), 1);
520 }
521
522 auto dbram = make_shared<Matrix>(isize, norb2, /*local=*/true);
523 for (size_t ij = 0; ij != norb2; ++ij)
524 copy_n(&(dbra->data(ij)->data(ioffset)), isize, dbram->element_ptr(0, ij));
525
526 // put in third-order RDM: <0|E_mn|I><I|E_ij < kl|0>
527 auto tmp3 = make_shared<Matrix>(*dbram % *eket_half);
528 auto tmp3_full = make_shared<Matrix>(norb2, norb2 * norb2, /*local=*/true);
529 // <0|E_ij > kl|I>[I|mn|0]
530 auto tmp4 = make_shared<Matrix>(*ebra_half % *feket);
531 auto tmp4_full = make_shared<Matrix>(norb2 * norb2, norb2, /*local=*/true);
532
533 for (size_t mn = 0; mn != norb2; ++mn) {
534 size_t no = 0;
535 for (size_t kl = 0; kl != norb2; ++kl) {
536 for (size_t ij = kl; ij != norb2; ++ij) {
537 size_t ijkl = ij + kl*norb2;
538 size_t klij = kl + ij*norb2;
539 tmp3_full->element(mn, ijkl) = tmp3->element(mn, no);
540 tmp3_full->element(mn, klij) = tmp3->element(mn, no);
541 tmp4_full->element(ijkl, mn) = tmp4->element(no, mn);
542 tmp4_full->element(klij, mn) = tmp4->element(no, mn);
543 ++no;
544 }
545 }
546 }
547 sort_indices<1,0,2,1,1,1,1>(tmp3_full->data(), rdm3->data(), norb_, norb_, norb2*norb2);
548 sort_indices<1,0,3,2,4,1,1,1,1>(tmp4_full->data(), rdm4f->data(), norb_, norb_, norb_, norb_, norb2);
549 }
550
551 rdm3->allreduce();
552 rdm4f->allreduce();
553
554 if (npass > 1) {
555 timer.tick_print("RDM evaluation (multipassing)");
556 }
557
558 {
559 // then perform Eq. 49 of JCP 89 5803 (Werner's MRCI paper)
560 // we assume that rdm2_[ist] is set
561 for (int i0 = 0; i0 != norb_; ++i0)
562 for (int i1 = 0; i1 != norb_; ++i1)
563 for (int i2 = 0; i2 != norb_; ++i2)
564 for (int i3 = 0; i3 != norb_; ++i3) {
565 blas::ax_plus_y_n(-1.0, rdm2_->at(ist, jst)->element_ptr(0, i2, i1, i0), norb_, rdm3->element_ptr(0, i3, i3, i2, i1, i0));
566 blas::ax_plus_y_n(-1.0, rdm2_->at(ist, jst)->element_ptr(0, i0, i3, i2), norb_, rdm3->element_ptr(0, i1, i3, i2, i1, i0));
567 }
568 }
569
570 {
571 // [0|E_ip,jl|0]
572 auto frdm3 = make_shared<RDM<2>>(norb_);
573 auto rdm3view = group(group(*rdm3, 4,6), 0,4);
574 auto frdm3view = group(*frdm3, 0,4);
575 contract(1.0, rdm3view, {0,1}, group(*fock, 0,2), {1}, 0.0, frdm3view, {0});
576
577 // <0|E_ip,jn|0>f_nl (in this order)
578 auto prdm2 = make_shared<RDM<2>>(norb_);
579 auto prdm2v = group(*prdm2, 0,3);
580 contract(1.0, group(*rdm2_->at(ist, jst), 0,3), {0,1}, *fock, {1,2}, 0.0, prdm2v, {0,2});
581
582 // <0|E_ik,jl,on|0>f_np (in this order)
583 auto prdm3 = make_shared<RDM<3>>(norb_);
584 auto prdm3v = group(*prdm3, 0,5);
585 contract(1.0, group(*rdm3, 0,5), {0,1}, *fock, {1,2}, 0.0, prdm3v, {0,2});
586
587 sort_indices<2,0,1,1,1,-1,1>(prdm3->data(), rdm4f->data(), norb_*norb_, norb_*norb_, norb_*norb_);
588 sort_indices<0,2,1,1,1,-1,1>(prdm3->data(), rdm4f->data(), norb_*norb_, norb_*norb_, norb_*norb_);
589
590 auto prdm2t = prdm2->clone();
591 sort_indices<1,0,0,1,1,1>(prdm2->data(), prdm2t->data(), norb_*norb_, norb_*norb_);
592 for (int p = 0; p != norb_; ++p)
593 for (int l = 0; l != norb_; ++l) {
594 blas::ax_plus_y_n(-1.0, prdm2t->element_ptr(0,0,0,p), norb_*norb_*norb_, rdm4f->element_ptr(0,0,0,l,l,p));
595 blas::ax_plus_y_n(-1.0, frdm3->element_ptr(0,0,0,p), norb_*norb_*norb_, rdm4f->element_ptr(0,0,0,l,l,p));
596 for (int k = 0; k != norb_; ++k)
597 for (int j = 0; j != norb_; ++j) {
598 blas::ax_plus_y_n(-1.0, prdm2->element_ptr(0,p,j,l), norb_, rdm4f->element_ptr(0,k,j,l,k,p));
599 blas::ax_plus_y_n(-1.0, frdm3->element_ptr(0,p,j,l), norb_, rdm4f->element_ptr(0,k,j,l,k,p));
600 }
601 }
602 }
603
604 cc_->set_det(det_);
605
606 return make_tuple(rdm3, rdm4f);
607 }
608