1 //
2 // BAGEL - Parallel electron correlation program.
3 // Filename: box.cc
4 // Copyright (C) 2016 Toru Shiozaki
5 //
6 // Author: Hai-Anh Le <anh@u.northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // The BAGEL package is free software; you can redistribute it and/or modify
12 // it under the terms of the GNU Library General Public License as published by
13 // the Free Software Foundation; either version 3, or (at your option)
14 // any later version.
15 //
16 // The BAGEL package is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU Library General Public License for more details.
20 //
21 // You should have received a copy of the GNU Library General Public License
22 // along with the BAGEL package; see COPYING. If not, write to
23 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24 //
25
26
27 #include <mutex>
28 #include <src/scf/fmm/box.h>
29 #include <src/integral/os/multipolebatch.h>
30 #include <src/integral/rys/eribatch.h>
31 #include <src/util/taskqueue.h>
32 #include <src/util/math/factorial.h>
33 #include <src/util/math/legendre.h>
34
35 using namespace bagel;
36 using namespace std;
37
38 const static Legendre plm;
39 const static Factorial f;
40
init()41 void Box::init() {
42
43 nchild_ = child_.size();
44 centre_ = {{0, 0, 0}};
45 extent_ = 0.0;
46 nsp_ = 0;
47 for (auto& i : sp_) {
48 if (i->schwarz() < schwarz_thresh_) continue;
49 ++nsp_;
50 }
51
52 if (nchild_ == 0) {
53 for (auto& i : sp_) {
54 if (i->schwarz() < schwarz_thresh_) continue;
55 for (int j = 0; j != 3; ++j) centre_[j] += i->centre(j);
56 }
57 centre_[0] /= nsp_;
58 centre_[1] /= nsp_;
59 centre_[2] /= nsp_;
60 for (auto& i : sp_) {
61 if (i->schwarz() < schwarz_thresh_) continue;
62 double rad = 0;
63 for (int j = 0; j != 3; ++j) rad += pow(i->centre(j)-centre_[j], 2.0);
64 const double ei = sqrt(rad) + i->extent();
65 assert(ei > 0);
66 extent_ = max(extent_, ei);
67 }
68 } else {
69 for (int n = 0; n != nchild_; ++n) {
70 shared_ptr<const Box> i = child_[n].lock();
71 for (int j = 0; j != 3; ++j) centre_[j] += i->centre(j);
72 }
73 centre_[0] /= nchild_;
74 centre_[1] /= nchild_;
75 centre_[2] /= nchild_;
76 for (int n = 0; n != nchild_; ++n) {
77 shared_ptr<const Box> i = child_[n].lock();
78 double rad = 0.0;
79 for (int j = 0; j != 3; ++j) rad += pow(i->centre(j)-centre_[j], 2.0);
80 const double ei = sqrt(rad) + i->extent();
81 assert(ei > 0);
82 extent_ = max(extent_, ei);
83 }
84 }
85
86 ws_ = static_cast<int>(ceil(extent_/boxsize_));
87 nmult_ = (lmax_ + 1) * (lmax_ + 1);
88 olm_ = make_shared<ZVectorB>(nmult_);
89 mlm_ = make_shared<ZVectorB>(nmult_);
90
91 nshell0_ = 0;
92 int cnt = 0;
93 for (auto& i : sp_) {
94 auto it = shell0_.insert(make_pair(i->shell(0), make_tuple(nshell0_, i->offset(0), cnt)));
95 if (it.second) {
96 nshell0_ += i->shell(0)->nbasis();
97 ++cnt;
98 }
99 }
100 }
101
102
insert_sp(const vector<shared_ptr<const ShellPair>> & sp)103 void Box::insert_sp(const vector<shared_ptr<const ShellPair>>& sp) {
104
105 const int nsp = sp_.size();
106 sp_.resize(nsp + sp.size());
107 for (int i = 0; i != sp.size(); ++i)
108 sp_[i + nsp] = sp[i];
109
110 }
111
112
insert_child(weak_ptr<const Box> c)113 void Box::insert_child(weak_ptr<const Box> c) {
114
115 const int nchild = child_.size();
116 shared_ptr<const Box> child = c.lock();
117 if (child) {
118 child_.resize(nchild + 1);
119 child_[nchild] = child;
120 }
121 }
122
123
insert_parent(weak_ptr<const Box> p)124 void Box::insert_parent(weak_ptr<const Box> p) {
125
126 parent_ = p;
127 }
128
129
get_neigh(const vector<weak_ptr<Box>> & box,const double ws)130 void Box::get_neigh(const vector<weak_ptr<Box>>& box, const double ws) {
131
132 shared_ptr<const Box> box0 = box[0].lock();
133 if ((box.empty()) || (box0->rank() != rank_)) return;
134
135 neigh_.resize(box.size());
136 nonneigh_.resize(box.size());
137 int nn = 0;
138 int nnn = 0;
139 for (int i = 0; i != box.size(); ++i) {
140 shared_ptr<const Box> b = box[i].lock();
141 if (is_neigh(b, ws)) {
142 neigh_[nn] = b;
143 ++nn;
144 } else {
145 nonneigh_[nnn] = b;
146 ++nnn;
147 }
148 }
149 neigh_.resize(nn);
150 nonneigh_.resize(nnn);
151 nneigh_ = neigh_.size();
152 }
153
154
is_neigh(weak_ptr<const Box> b,const double ws) const155 bool Box::is_neigh(weak_ptr<const Box> b, const double ws) const {
156
157 shared_ptr<const Box> box = b.lock();
158 if (nchild_ == 0) {
159 double rr = 0;
160 for (int i = 0; i != 3; ++i) rr += pow(centre_[i] - box->centre(i), 2);
161 return (sqrt(rr) <= (1.0+ws)*(extent_ + box->extent()));
162 } else {
163 for (int n0 = 0; n0 != nchild_; ++n0) {
164 shared_ptr<const Box> i = child_[n0].lock();
165 for (int n1 = 0; n1 != box->child().size(); ++n1) {
166 shared_ptr<const Box> j = box->child()[n1].lock();
167 if (i->is_neigh(j, ws)) return true;
168 }
169 }
170 return false;
171 }
172 }
173
get_inter(const vector<weak_ptr<Box>> & box,const double ws)174 void Box::get_inter(const vector<weak_ptr<Box>>& box, const double ws) {
175
176 shared_ptr<const Box> box0 = box[0].lock();
177 if ((box.empty()) || (box0->rank() != rank_)) return;
178
179 shared_ptr<const Box> parent = parent_.lock();
180 inter_.resize(box.size());
181 int ni = 0;
182 for (int i = 0; i != box.size(); ++i) {
183 shared_ptr<const Box> b = box[i].lock();
184 if ((!is_neigh(b, ws)) && (!parent || parent->is_neigh(b->parent(), ws))) {
185 inter_[ni] = b;
186 ++ni;
187 }
188 }
189 inter_.resize(ni);
190 ninter_ = inter_.size();
191 }
192
193
sort_sp()194 void Box::sort_sp() {
195
196 if (nchild_ == 0) return;
197
198 assert(nchild_ > 0);
199 vector<shared_ptr<const ShellPair>> newsp;
200 for (int n = 0; n != nchild_; ++n) {
201 shared_ptr<const Box> c = child_[n].lock();
202 const vector<shared_ptr<const ShellPair>>& csp = c->sp();
203 newsp.insert(newsp.end(), csp.begin(), csp.end());
204 }
205 assert(newsp.size() == sp_.size());
206 sp_ = newsp;
207 }
208
209
compute_M2M(shared_ptr<const Matrix> density)210 void Box::compute_M2M(shared_ptr<const Matrix> density) {
211
212 olm_->fill(0.0);
213
214 if (nchild_ == 0) { // leaf
215 TaskQueue<function<void(void)>> tasks(nsp_);
216 mutex jmutex;
217 for (auto& v : sp_) {
218 if (v->schwarz() < schwarz_thresh_) continue;
219
220 tasks.emplace_back(
221 [this, &v, &density, &jmutex]() {
222 auto olm = make_shared<ZVectorB>(nmult_);
223 MultipoleBatch mpole(v->shells(), centre_, lmax_);
224 mpole.compute();
225 const int dimb0 = v->shell(0)->nbasis();
226 const int dimb1 = v->shell(1)->nbasis();
227
228 for (int k = 0; k != mpole.num_blocks(); ++k) {
229 size_t cnt = 0;
230 for (int i = v->offset(1); i != dimb1 + v->offset(1); ++i) {
231 (*olm)(k) += conj(blas::dot_product_noconj(mpole.data() + mpole.size_block()*k + cnt, dimb0, density->element_ptr(v->offset(0), i)));
232 cnt += dimb0;
233 }
234 }
235
236 lock_guard<mutex> lock(jmutex);
237 blas::ax_plus_y_n(1.0, olm->data(), nmult_, olm_->data());
238 }
239 );
240 }
241 tasks.compute();
242 } else { // shift children's multipoles
243 for (int n = 0; n != nchild_; ++n) {
244 shared_ptr<const Box> c = child_[n].lock();
245 const array<double, 3> r12 = {{c->centre(0) - centre_[0], c->centre(1) - centre_[1], c->centre(2) - centre_[2]}};
246 auto cmultipole = make_shared<ZMatrix>(1, nmult_);
247 copy_n(c->olm()->data(), nmult_, cmultipole->data());
248 shared_ptr<const ZMatrix> tmp = shift_multipolesX(lmax_, cmultipole, r12);
249 auto smoment = make_shared<ZVectorB>(nmult_);
250 copy_n(tmp->data(), nmult_, smoment->data());
251 blas::ax_plus_y_n(1.0, smoment->data(), nmult_, olm_->data());
252 }
253 }
254 }
255
256
257 //|su) C_ui C_sj
compute_M2M_X(shared_ptr<const Matrix> ocoeff_sj,shared_ptr<const Matrix> ocoeff_ui)258 void Box::compute_M2M_X(shared_ptr<const Matrix> ocoeff_sj, shared_ptr<const Matrix> ocoeff_ui) {
259
260 const int nmult_k = (lmax_k_+1)*(lmax_k_+1);
261 olm_ndim_ = ocoeff_sj->mdim(); olm_mdim_ = ocoeff_ui->mdim();
262 olm_ji_ = make_shared<ZMatrix>(olm_ndim_*olm_mdim_, nmult_k);
263
264 if (nchild_ == 0) { // leaf
265 ZMatrix interm(nshell0_, ocoeff_ui->mdim()*nmult_k, true);
266 TaskQueue<function<void(void)>> tasks(nsp_);
267 vector<mutex> mmutex(shell0_.size());
268
269 for (auto& v : sp_) {
270 if (v->schwarz() < schwarz_thresh_) continue;
271
272 tasks.emplace_back(
273 [this, v, ocoeff_ui, &mmutex, nmult_k, &interm]() {
274 MultipoleBatch olm_su(v->shells(), centre_, lmax_k_);
275 olm_su.compute();
276 const int dimb0 = v->shell(0)->nbasis();
277 const int dimb1 = v->shell(1)->nbasis();
278 const ZMatrix zui(*ocoeff_ui->cut(v->offset(1), v->offset(1)+dimb1), 1.0);
279 unique_ptr<complex<double>[]> tmp(new complex<double>[dimb0*ocoeff_ui->mdim()*nmult_k]);
280 for (int k = 0; k != nmult_k; ++k)
281 zgemm_("N", "N", dimb0, ocoeff_ui->mdim(), dimb1, 1.0, olm_su.data() + olm_su.size_block()*k, dimb0, zui.data(), dimb1,
282 0.0, tmp.get()+dimb0*ocoeff_ui->mdim()*k, dimb0);
283 lock_guard<mutex> lock(mmutex[get<2>(shell0_.at(v->shell(0)))]);
284 const int off = get<0>(shell0_.at(v->shell(0)));
285 interm.add_block(1.0, off, 0, dimb0, ocoeff_ui->mdim()*nmult_k, tmp.get());
286 }
287 );
288 }
289 tasks.compute();
290 blas::conj_n(interm.data(), interm.size());
291
292 ZMatrix icoeff(nshell0_, ocoeff_sj->mdim(), true);
293 for (auto& i : shell0_) {
294 const int local_offset = get<0>(i.second);
295 const int all_offset = get<1>(i.second);
296 const int size = i.first->nbasis();
297 for (int m = 0; m != ocoeff_sj->mdim(); ++m)
298 for (int n = 0; n != size; ++n)
299 icoeff(n+local_offset, m) = ocoeff_sj->element(n+all_offset, m);
300 }
301 zgemm_("C", "N", icoeff.mdim(), interm.mdim(), nshell0_, 1.0, icoeff.data(), nshell0_, interm.data(), nshell0_, 0.0, olm_ji_->data(), icoeff.mdim());
302
303 } else { // shift children's multipoles
304 for (int n = 0; n != nchild_; ++n) {
305 shared_ptr<const Box> c = child_[n].lock();
306 const array<double, 3> r12 = {{c->centre(0) - centre_[0], c->centre(1) - centre_[1], c->centre(2) - centre_[2]}};
307 shared_ptr<const ZMatrix> smoment = shift_multipolesX(lmax_k_, c->olm_ji(), r12);
308 blas::ax_plus_y_n(1.0, smoment->data(), olm_ji_->size(), olm_ji_->data());
309 }
310 }
311 }
312
313
compute_L2L_X()314 void Box::compute_L2L_X() {
315
316 // from parent
317 shared_ptr<const Box> parent = parent_.lock();
318 if (parent) {
319 const array<double, 3> r12 = {{centre_[0] - parent->centre(0), centre_[1] - parent->centre(1), centre_[2] - parent->centre(2)}};
320 shared_ptr<const ZMatrix> slocal = shift_localLX(lmax_k_, parent->mlm_ji(), r12);
321 blas::ax_plus_y_n(1.0, slocal->data(), mlm_ji_->size(), mlm_ji_->data());
322 }
323 }
324
325
compute_L2L()326 void Box::compute_L2L() {
327
328 // from parent
329 shared_ptr<const Box> parent = parent_.lock();
330 if (parent) {
331 const array<double, 3> r12 = {{centre_[0] - parent->centre(0), centre_[1] - parent->centre(1), centre_[2] - parent->centre(2)}};
332 auto plocalJ = make_shared<ZMatrix>(1, nmult_);
333 copy_n(parent->mlm()->data(), nmult_, plocalJ->data());
334 shared_ptr<const ZMatrix> tmp = shift_localLX(lmax_, plocalJ, r12);
335 auto slocal = make_shared<ZVectorB>(nmult_);
336 copy_n(tmp->data(), nmult_, slocal->data());
337 blas::ax_plus_y_n(1.0, slocal->data(), nmult_, mlm_->data());
338 }
339 }
340
341
compute_M2L_X()342 void Box::compute_M2L_X() {
343
344 mlm_ji_ = olm_ji_->clone();
345
346 // from interaction list
347 for (int i = 0; i != ninter_; ++i) {
348 shared_ptr<const Box> it = inter_[i].lock();
349 const array<double, 3> r12 = {{centre_[0] - it->centre(0), centre_[1] - it->centre(1), centre_[2] - it->centre(2)}};
350 shared_ptr<const ZMatrix> slocal = shift_localMX(lmax_k_, it->olm_ji(), r12);
351 blas::ax_plus_y_n(1.0, slocal->data(), mlm_ji_->size(), mlm_ji_->data());
352 }
353 }
354
355
compute_M2L()356 void Box::compute_M2L() {
357
358 mlm_->fill(0.0);
359 // from interaction list
360 for (int i = 0; i != ninter_; ++i) {
361 shared_ptr<const Box> it = inter_[i].lock();
362 const array<double, 3> r12 = {{centre_[0] - it->centre(0), centre_[1] - it->centre(1), centre_[2] - it->centre(2)}};
363 auto imultipole = make_shared<ZMatrix>(1, nmult_);
364 copy_n(it->olm()->data(), nmult_, imultipole->data());
365 shared_ptr<const ZMatrix> tmp = shift_localMX(lmax_, imultipole, r12);
366 auto slocal = make_shared<ZVectorB>(nmult_);
367 copy_n(tmp->data(), nmult_, slocal->data());
368 blas::ax_plus_y_n(1.0, slocal->data(), nmult_, mlm_->data());
369 }
370 }
371
372
compute_Fock_nf(shared_ptr<const Matrix> density,shared_ptr<const VectorB> max_den) const373 shared_ptr<const Matrix> Box::compute_Fock_nf(shared_ptr<const Matrix> density, shared_ptr<const VectorB> max_den) const {
374
375 assert(nchild_ == 0);
376
377 auto out = make_shared<Matrix>(density->ndim(), density->mdim());
378 out->zero();
379
380 // NF: 4c integrals
381 const int shift = sizeof(int) * 4;
382 const int nsh = sqrt(max_den->size());
383 assert (nsh*nsh == max_den->size());
384
385 int ntask = 0;
386 for (auto& v01 : sp_) {
387 if (v01->schwarz() < schwarz_thresh_) continue;
388 const int i0 = v01->shell_ind(1);
389
390 const int i1 = v01->shell_ind(0);
391 if (i1 < i0) continue;
392
393 const int i01 = i0 * nsh + i1;
394 const double density_01 = (*max_den)(i01) * 4.0;
395
396 for (int n = 0; n != nneigh_; ++n) {
397 shared_ptr<const Box> neigh = neigh_[n].lock();
398 for (auto& v23 : neigh->sp()) {
399 if (v23->schwarz() < schwarz_thresh_) continue;
400 const int i2 = v23->shell_ind(1);
401 if (i2 < i0) continue;
402 const int i3 = v23->shell_ind(0);
403 if (i3 < i2) continue;
404 const int i23 = i2 * nsh + i3;
405 if (i23 < i01) continue;
406
407 const double density_23 = (*max_den)(i23) * 4.0;
408 const double density_02 = (*max_den)(i0 * nsh + i2);
409 const double density_03 = (*max_den)(i0 * nsh + i3);
410 const double density_12 = (*max_den)(i1 * nsh + i2);
411 const double density_13 = (*max_den)(i1 * nsh + i3);
412
413 const double mulfactor = max(max(max(density_01, density_02),
414 max(density_03, density_12)),
415 max(density_13, density_23));
416 const double integral_bound = mulfactor * v01->schwarz() * v23->schwarz();
417 const bool skip_schwarz = integral_bound < schwarz_thresh_;
418 if (skip_schwarz) continue;
419 ++ntask;
420 }
421 }
422 }
423
424 TaskQueue<function<void(void)>> tasks(ntask);
425 mutex jmutex;
426
427 for (auto& v01 : sp_) {
428 if (v01->schwarz() < schwarz_thresh_) continue;
429 shared_ptr<const Shell> b0 = v01->shell(1);
430 const int i0 = v01->shell_ind(1);
431 const int b0offset = v01->offset(1);
432 const int b0size = b0->nbasis();
433
434 shared_ptr<const Shell> b1 = v01->shell(0);
435 const int i1 = v01->shell_ind(0);
436 if (i1 < i0) continue;
437 const int b1offset = v01->offset(0);
438 const int b1size = b1->nbasis();
439
440 const int i01 = i0 * nsh + i1;
441 const double density_01 = (*max_den)(i01) * 4.0;
442
443 for (int n = 0; n != nneigh_; ++n) {
444 shared_ptr<const Box> neigh = neigh_[n].lock();
445 for (auto& v23 : neigh->sp()) {
446 if (v23->schwarz() < schwarz_thresh_) continue;
447 shared_ptr<const Shell> b2 = v23->shell(1);
448 const int i2 = v23->shell_ind(1);
449 if (i2 < i0) continue;
450 const int b2offset = v23->offset(1);
451 const int b2size = b2->nbasis();
452
453 shared_ptr<const Shell> b3 = v23->shell(0);
454 const int i3 = v23->shell_ind(0);
455 if (i3 < i2) continue;
456 const int b3offset = v23->offset(0);
457 const int b3size = b3->nbasis();
458
459 const int i23 = i2 * nsh + i3;
460 if (i23 < i01) continue;
461
462 const double density_23 = (*max_den)(i23) * 4.0;
463 const double density_02 = (*max_den)(i0 * nsh + i2);
464 const double density_03 = (*max_den)(i0 * nsh + i3);
465 const double density_12 = (*max_den)(i1 * nsh + i2);
466 const double density_13 = (*max_den)(i1 * nsh + i3);
467
468 const double mulfactor = max(max(max(density_01, density_02),
469 max(density_03, density_12)),
470 max(density_13, density_23));
471 const double integral_bound = mulfactor * v01->schwarz() * v23->schwarz();
472 const bool skip_schwarz = integral_bound < schwarz_thresh_;
473 if (skip_schwarz) continue;
474
475 array<shared_ptr<const Shell>,4> input = {{b3, b2, b1, b0}};
476
477 tasks.emplace_back(
478 [this, &out, &density, input, b0offset, i01, i23, b0size, b1offset, b1size, b2offset, b2size, b3offset, b3size, mulfactor, &jmutex]() {
479
480 ERIBatch eribatch(input, mulfactor);
481 eribatch.compute();
482 const double* eridata = eribatch.data();
483
484 const double* density_data = density->data();
485 lock_guard<mutex> lock(jmutex);
486 for (int j0 = b0offset; j0 != b0offset + b0size; ++j0) {
487 const int j0n = j0 * density->ndim();
488 for (int j1 = b1offset; j1 != b1offset + b1size; ++j1) {
489 if (j1 < j0) {
490 eridata += b2size * b3size;
491 continue;
492 }
493 const int j1n = j1 * density->ndim();
494 const unsigned int nj01 = (j0 << shift) + j1;
495 const double scale01 = (j0 == j1) ? 0.5 : 1.0;
496 for (int j2 = b2offset; j2 != b2offset + b2size; ++j2) {
497 const int j2n = j2 * density->ndim();
498 for (int j3 = b3offset; j3 != b3offset + b3size; ++j3, ++eridata) {
499 if (j3 < j2) continue;
500 const unsigned int nj23 = (j2 << shift) + j3;
501 if (nj23 < nj01 && i01 == i23) continue;
502 const double scale23 = (j2 == j3) ? 0.5 : 1.0;
503 const double scale = (nj01 == nj23) ? 0.25 : 0.5;
504
505 const double eri = *eridata;
506 const double intval = eri * scale * scale01 * scale23;
507 const double intval4 = 4.0 * intval;
508 out->element(j1, j0) += density_data[j2n + j3] * intval4;
509 out->element(j3, j2) += density_data[j0n + j1] * intval4;
510 out->element(max(j2, j0), min(j2,j0)) -= density_data[j1n + j3] * intval;
511 out->element(j3, j0) -= density_data[j1n + j2] * intval;
512 out->element(max(j1,j2), min(j1,j2)) -= density_data[j0n + j3] * intval;
513 out->element(max(j1,j3), min(j1,j3)) -= density_data[j0n + j2] * intval;
514 }
515 }
516 }
517 }
518
519 }
520 );
521
522 }
523 }
524 }
525
526 tasks.compute();
527
528 return out;
529 }
530
531
print_box() const532 void Box::print_box() const {
533 cout << "Box " << boxid_ << " Rank = " << rank_ << " *** nchild = " << nchild_ << " *** nsp = " << sp_.size()
534 << " *** nneigh = " << nneigh_ << " *** ninter = " << ninter_ << " *** extent = " << extent_
535 << " *** centre = " << setprecision(3) << centre_[0] << " " << centre_[1] << " " << centre_[2] << endl;
536
537 // cout << " *** Shell pairs at ***" << endl;
538 // for (int i = 0; i != sp_.size(); ++i)
539 // cout << setprecision(5) << sp(i)->centre(0) << " " << sp(i)->centre(1) << " " << sp(i)->centre(2) << endl;
540 }
541
542
compute_Fock_ff(shared_ptr<const Matrix> density) const543 shared_ptr<const Matrix> Box::compute_Fock_ff(shared_ptr<const Matrix> density) const {
544
545 assert(nchild_ == 0);
546 auto out = make_shared<ZMatrix>(density->ndim(), density->mdim());
547
548 // compute multipoles again - not efficient - for now
549 TaskQueue<function<void(void)>> tasks(nsp_);
550 mutex jmutex;
551 for (auto& v : sp_) {
552 if (v->schwarz() < schwarz_thresh_) continue;
553
554 tasks.emplace_back(
555 [this, &out, &v, &density, &jmutex]() {
556 MultipoleBatch mpole(v->shells(), centre_, lmax_);
557 mpole.compute();
558 const int dimb0 = v->shell(0)->nbasis();
559 const int dimb1 = v->shell(1)->nbasis();
560 vector<const complex<double>*> dat(nmult_);
561 for (int i = 0; i != nmult_; ++i)
562 dat[i] = mpole.data() + mpole.size_block()*i;
563
564 lock_guard<mutex> lock(jmutex);
565 for (int i = v->offset(1); i != dimb1 + v->offset(1); ++i)
566 for (int j = v->offset(0); j != dimb0 + v->offset(0); ++j)
567 for (int k = 0; k != mpole.num_blocks(); ++k)
568 out->element(i, j) += conj(*dat[k]++) * (*mlm_)(k);
569 }
570 );
571 }
572 tasks.compute();
573 assert(out->get_imag_part()->rms() < 1e-10);
574
575 return out->get_real_part();
576 }
577
578
compute_Fock_ff_K(shared_ptr<const Matrix> ocoeff_ti) const579 shared_ptr<const Matrix> Box::compute_Fock_ff_K(shared_ptr<const Matrix> ocoeff_ti) const {
580
581 assert(nchild_ == 0);
582 auto out = make_shared<Matrix>(ocoeff_ti->ndim(), olm_ndim_);
583 assert(ocoeff_ti->mdim() == olm_mdim_);
584 const int nmult_k = (lmax_k_+1)*(lmax_k_+1);
585 TaskQueue<function<void(void)>> tasks(nsp_);
586
587 ZMatrix interm(nshell0_, ocoeff_ti->mdim()*nmult_k, true);
588 vector<mutex> mmutex(shell0_.size());
589
590 for (auto& v : sp_) {
591 if (v->schwarz() < schwarz_thresh_) continue;
592
593 tasks.emplace_back(
594 [this, v, ocoeff_ti, &mmutex, nmult_k, &interm]() {
595 MultipoleBatch olm_su(v->shells(), centre_, lmax_k_);
596 olm_su.compute();
597 const int dimb0 = v->shell(0)->nbasis();
598 const int dimb1 = v->shell(1)->nbasis();
599 const ZMatrix zti(*ocoeff_ti->cut(v->offset(1), v->offset(1)+dimb1), 1.0);
600 unique_ptr<complex<double>[]> tmp(new complex<double>[dimb0*ocoeff_ti->mdim()*nmult_k]);
601 for (int k = 0; k != nmult_k; ++k)
602 zgemm_("N", "N", dimb0, ocoeff_ti->mdim(), dimb1, 1.0, olm_su.data() + olm_su.size_block()*k, dimb0, zti.data(), dimb1,
603 0.0, tmp.get()+dimb0*ocoeff_ti->mdim()*k, dimb0);
604 lock_guard<mutex> lock(mmutex[get<2>(shell0_.at(v->shell(0)))]);
605 const int off = get<0>(shell0_.at(v->shell(0)));
606 interm.add_block(1.0, off, 0, dimb0, ocoeff_ti->mdim()*nmult_k, tmp.get());
607 }
608 );
609 }
610 tasks.compute();
611
612 ZMatrix kjr(olm_ndim_, nshell0_);
613 zgemm_("N", "C", olm_ndim_, nshell0_, olm_mdim_*nmult_k, 1.0, mlm_ji_->data(), olm_ndim_, interm.data(), nshell0_, 0.0, kjr.data(), olm_ndim_);
614
615 auto krj = kjr.get_real_part()->transpose();
616
617 for (auto& i : shell0_) {
618 const int dimb0 = i.first->nbasis();
619 const int local_offset = get<0>(i.second);
620 const int all_offset = get<1>(i.second);
621 for (int i = 0; i != olm_ndim_; ++i)
622 blas::ax_plus_y_n(1.0, krj->element_ptr(local_offset, i), dimb0, out->element_ptr(all_offset, i));
623 }
624
625 return out;
626 }
627
628
compute_exact_ff(shared_ptr<const Matrix> density) const629 shared_ptr<const Matrix> Box::compute_exact_ff(shared_ptr<const Matrix> density) const { //for debug
630
631 auto jff = make_shared<Matrix>(density->ndim(), density->mdim());
632 jff->zero();
633
634 int ntask = 0;
635 for (auto& v01 : sp_) {
636 if (v01->schwarz() < schwarz_thresh_) continue;
637 for (int n = 0; n != nonneigh_.size(); ++n) {
638 shared_ptr<const Box> nonneigh = neigh_[n].lock();
639 for (auto& v23 : nonneigh->sp()) {
640 if (v23->schwarz() < schwarz_thresh_) continue;
641 ++ntask;
642 }
643 }
644 }
645
646 TaskQueue<function<void(void)>> tasks(ntask);
647 mutex jmutex;
648
649 for (auto& v01 : sp_) {
650 if (v01->schwarz() < schwarz_thresh_) continue;
651 shared_ptr<const Shell> b0 = v01->shell(1);
652 const int b0offset = v01->offset(1);
653 const int b0size = b0->nbasis();
654
655 shared_ptr<const Shell> b1 = v01->shell(0);
656 const int b1offset = v01->offset(0);
657 const int b1size = b1->nbasis();
658
659 for (int n = 0; n != nonneigh_.size(); ++n) {
660 shared_ptr<const Box> nonneigh = neigh_[n].lock();
661 for (auto& v23 : nonneigh->sp()) {
662 if (v23->schwarz() < schwarz_thresh_) continue;
663 shared_ptr<const Shell> b2 = v23->shell(1);
664 const int b2offset = v23->offset(1);
665 const int b2size = b2->nbasis();
666
667 shared_ptr<const Shell> b3 = v23->shell(0);
668 const int b3offset = v23->offset(0);
669 const int b3size = b3->nbasis();
670
671 array<shared_ptr<const Shell>,4> input = {{b3, b2, b1, b0}};
672
673 tasks.emplace_back(
674 [this, &jff, &density, input, b0offset, b0size, b1offset, b1size, b2offset, b2size, b3offset, b3size, &jmutex]() {
675 const double* density_data = density->data();
676
677 ERIBatch eribatch(input, 0.0);
678 eribatch.compute();
679 const double* eridata = eribatch.data();
680
681 lock_guard<mutex> lock(jmutex);
682 for (int j0 = b0offset; j0 != b0offset + b0size; ++j0) {
683 for (int j1 = b1offset; j1 != b1offset + b1size; ++j1) {
684 const int j1n = j1 * density->ndim();
685 for (int j2 = b2offset; j2 != b2offset + b2size; ++j2) {
686 const int j2n = j2 * density->ndim();
687 for (int j3 = b3offset; j3 != b3offset + b3size; ++j3, ++eridata) {
688 const double eri = *eridata;
689 jff->element(j1, j0) += density_data[j2n + j3] * eri;
690 jff->element(j3, j0) -= density_data[j1n + j2] * eri * 0.5;
691 }
692 }
693 }
694 }
695
696 }
697 );
698
699 }
700 }
701 }
702 tasks.compute();
703
704 return jff;
705 }
706
707
708 // M2M for X
shift_multipolesX(const int lmax,shared_ptr<const ZMatrix> oa,array<double,3> rab) const709 shared_ptr<const ZMatrix> Box::shift_multipolesX(const int lmax, shared_ptr<const ZMatrix> oa, array<double, 3> rab) const {
710
711 const double r = sqrt(rab[0]*rab[0] + rab[1]*rab[1] + rab[2]*rab[2]);
712 const double ctheta = (r > numerical_zero__) ? rab[2]/r : 0.0;
713 const double phi = atan2(rab[1], rab[0]);
714 const int nmult = (lmax+1)*(lmax+1);
715 const int olm_size_block = oa->ndim();
716
717 shared_ptr<ZMatrix> ob = oa->clone();
718 unique_ptr<double[]> plm0(new double[nmult]);
719 for (int l = 0; l != lmax+1; ++l)
720 for (int m = 0; m <= 2 * l; ++m) {
721 const int b = m-l;
722 const double sign = (b >= 0) ? 1.0 : (1-((b&1)<<1));
723 plm0[l*l+m] = sign * plm.compute(l, abs(b), ctheta);
724 }
725
726 unique_ptr<double[]> invfac(new double[2*lmax+1]);
727 fill_n(invfac.get(), 2*lmax+1, 1.0);
728 for (int i = 1; i <= 2*lmax; ++i)
729 for (int j = i; j <= 2*lmax; ++j)
730 invfac[j] /= i;
731
732 ZMatrix lmjk(nmult, nmult);
733 for (int l = 0; l <= lmax; ++l) {
734 for (int j = 0; j <= lmax; ++j) {
735 const int a = l - j;
736 const double rr = pow(r, a);
737 if (a < 0) continue;
738 for (int m = 0; m <= 2 * l; ++m) {
739 const int lo = max(-a, m-l-j);
740 const int hi = min( a, m-l+j);
741 for (int b = lo; b <= hi; ++b) {
742 const int k = m - l - b + j;
743 const double prefactor = rr * plm0[a*a+a+b] * invfac[a+abs(b)];
744 const complex<double> Oab = polar(prefactor, -b*phi);
745 lmjk.element(l*l+m, j*j+k) = Oab;
746 }
747 }
748 }
749 }
750 zgemm_("N", "T", olm_size_block, nmult, nmult, 1.0, oa->data(), olm_size_block, lmjk.data(), nmult, 0.0, ob->data(), olm_size_block);
751
752 return ob;
753 }
754
755
756 // L2L for X
shift_localLX(const int lmax,const shared_ptr<const ZMatrix> mr,array<double,3> rb) const757 shared_ptr<const ZMatrix> Box::shift_localLX(const int lmax, const shared_ptr<const ZMatrix> mr, array<double, 3> rb) const {
758
759 const double r = sqrt(rb[0]*rb[0] + rb[1]*rb[1] + rb[2]*rb[2]);
760 const double ctheta = (r > numerical_zero__) ? rb[2]/r : 0.0;
761 const double phi = atan2(rb[1], rb[0]);
762 const int nmult = (lmax+1)*(lmax+1);
763 const int olm_size_block = mr->ndim();
764
765 shared_ptr<ZMatrix> mrb = mr->clone();
766 unique_ptr<double[]> plm0(new double[nmult]);
767 for (int l = 0; l != lmax+1; ++l)
768 for (int m = 0; m <= 2 * l; ++m) {
769 const int b = m-l;
770 const double sign = (b >= 0) ? 1.0 : (1-((b&1)<<1));
771 plm0[l*l+m] = sign * plm.compute(l, abs(b), ctheta);
772 }
773
774 unique_ptr<double[]> invfac(new double[2*lmax+1]);
775 fill_n(invfac.get(), 2*lmax+1, 1.0);
776 for (int i = 1; i <= 2*lmax; ++i)
777 for (int j = i; j <= 2*lmax; ++j)
778 invfac[j] /= i;
779
780 ZMatrix lmjk(nmult, nmult);
781 for (int l = 0; l <= lmax; ++l) {
782 for (int j = 0; j <= lmax; ++j) {
783 const int a = j - l;
784 const double rr = pow(r, a);
785 if (a < 0) continue;
786 for (int m = 0; m <= 2 * l; ++m) {
787 const int lo = max(-a, l-m-j);
788 const int hi = min( a, l-m+j);
789 for (int b = lo; b <= hi; ++b) {
790 const int k = m - l + b + j;
791 const double prefactor = rr * plm0[a*a+a+b] * invfac[a+abs(b)];
792 const complex<double> Oab = polar(prefactor, -b*phi);
793 lmjk.element(l*l+m, j*j+k) = Oab;
794 }
795 }
796 }
797 }
798
799 zgemm_("N", "T", olm_size_block, nmult, nmult, 1.0, mr->data(), olm_size_block, lmjk.data(), nmult, 0.0, mrb->data(), olm_size_block);
800 return mrb;
801 }
802
803
804 // M2L for X
shift_localMX(const int lmax,shared_ptr<const ZMatrix> olm,array<double,3> r12) const805 shared_ptr<const ZMatrix> Box::shift_localMX(const int lmax, shared_ptr<const ZMatrix> olm, array<double, 3> r12) const {
806
807 const double r = sqrt(r12[0]*r12[0] + r12[1]*r12[1] + r12[2]*r12[2]);
808 const double ctheta = (r > numerical_zero__) ? r12[2]/r : 0.0;
809 const double phi = atan2(r12[1], r12[0]);
810 const int nmult = (lmax+1)*(lmax+1);
811 const int olm_size_block = olm->ndim();
812
813 shared_ptr<ZMatrix> mb = olm->clone();
814 unique_ptr<double[]> plm0(new double[(2*lmax+1)*(2*lmax+1)]);
815 for (int l = 0; l != 2*lmax+1; ++l)
816 for (int m = 0; m <= 2 * l; ++m) {
817 const int b = m-l;
818 const double sign = (b >= 0) ? 1.0 : (1-((b&1)<<1));
819 plm0[l*l+m] = sign * plm.compute(l, abs(b), ctheta);
820 }
821
822 ZMatrix lmjk(nmult, nmult);
823 for (int l = 0; l <= lmax; ++l) {
824 const double phase_l = (1-((l&1)<<1));
825 for (int j = 0; j <= lmax; ++j) {
826 const int a = l + j;
827 const double rr = 1.0 / pow(r, a + 1);
828 for (int m = 0; m <= 2 * l; ++m) {
829 for (int k = 0; k <= 2 * j; ++k) {
830 const int b = m - l + k - j;
831 double prefactor = plm0[a*a+a+b] * rr * f(a-abs(b));
832 const complex<double> Mab = phase_l * polar(prefactor, b*phi);
833 lmjk.element(l*l+m, j*j+k) = Mab;
834 }
835 }
836 }
837 }
838
839 zgemm_("N", "T", olm_size_block, nmult, nmult, 1.0, olm->data(), olm_size_block, lmjk.data(), nmult, 0.0, mb->data(), olm_size_block);
840 return mb;
841 }
842
843
compute_Fock_nf_J(shared_ptr<const Matrix> density,shared_ptr<const VectorB> max_den) const844 shared_ptr<const Matrix> Box::compute_Fock_nf_J(shared_ptr<const Matrix> density, shared_ptr<const VectorB> max_den) const {
845
846 assert(nchild_ == 0);
847 auto out = make_shared<Matrix>(density->ndim(), density->mdim());
848 out->zero();
849
850 // NF: 4c integrals
851 const int shift = sizeof(int) * 4;
852 const int nsh = sqrt(max_den->size());
853 assert (nsh*nsh == max_den->size());
854
855 int ntask = 0;
856 for (auto& v01 : sp_) {
857 if (v01->schwarz() < schwarz_thresh_) continue;
858 const int i0 = v01->shell_ind(1);
859
860 const int i1 = v01->shell_ind(0);
861 if (i1 < i0) continue;
862
863 const int i01 = i0 * nsh + i1;
864 const double density_01 = (*max_den)(i01) * 4.0;
865
866 for (int n = 0; n != nneigh_; ++n) {
867 shared_ptr<const Box> neigh = neigh_[n].lock();
868 for (auto& v23 : neigh->sp()) {
869 if (v23->schwarz() < schwarz_thresh_) continue;
870 const int i2 = v23->shell_ind(1);
871 if (i2 < i0) continue;
872 const int i3 = v23->shell_ind(0);
873 if (i3 < i2) continue;
874 const int i23 = i2 * nsh + i3;
875 if (i23 < i01) continue;
876
877 const double density_23 = (*max_den)(i23) * 4.0;
878
879 const double mulfactor = max(density_01, density_23);
880 const double integral_bound = mulfactor * v01->schwarz() * v23->schwarz();
881 const bool skip_schwarz = integral_bound < schwarz_thresh_;
882 if (skip_schwarz) continue;
883 ++ntask;
884 }
885 }
886 }
887
888 TaskQueue<function<void(void)>> tasks(ntask);
889 mutex jmutex;
890
891 for (auto& v01 : sp_) {
892 if (v01->schwarz() < schwarz_thresh_) continue;
893 shared_ptr<const Shell> b0 = v01->shell(1);
894 const int i0 = v01->shell_ind(1);
895 const int b0offset = v01->offset(1);
896 const int b0size = b0->nbasis();
897
898 shared_ptr<const Shell> b1 = v01->shell(0);
899 const int i1 = v01->shell_ind(0);
900 if (i1 < i0) continue;
901 const int b1offset = v01->offset(0);
902 const int b1size = b1->nbasis();
903
904 const int i01 = i0 * nsh + i1;
905 const double density_01 = (*max_den)(i01) * 4.0;
906
907 for (int n = 0; n != nneigh_; ++n) {
908 shared_ptr<const Box> neigh = neigh_[n].lock();
909 for (auto& v23 : neigh->sp()) {
910 if (v23->schwarz() < schwarz_thresh_) continue;
911 shared_ptr<const Shell> b2 = v23->shell(1);
912 const int i2 = v23->shell_ind(1);
913 if (i2 < i0) continue;
914 const int b2offset = v23->offset(1);
915 const int b2size = b2->nbasis();
916
917 shared_ptr<const Shell> b3 = v23->shell(0);
918 const int i3 = v23->shell_ind(0);
919 if (i3 < i2) continue;
920 const int b3offset = v23->offset(0);
921 const int b3size = b3->nbasis();
922
923 const int i23 = i2 * nsh + i3;
924 if (i23 < i01) continue;
925
926 const double density_23 = (*max_den)(i23) * 4.0;
927
928 const double mulfactor = max(density_01, density_23);
929 const double integral_bound = mulfactor * v01->schwarz() * v23->schwarz();
930 const bool skip_schwarz = integral_bound < schwarz_thresh_;
931 if (skip_schwarz) continue;
932
933 array<shared_ptr<const Shell>,4> input = {{b3, b2, b1, b0}};
934
935 tasks.emplace_back(
936 [this, &out, &density, input, b0offset, i01, i23, b0size, b1offset, b1size, b2offset, b2size, b3offset, b3size, mulfactor, &jmutex]() {
937
938 ERIBatch eribatch(input, mulfactor);
939 eribatch.compute();
940 const double* eridata = eribatch.data();
941
942 const double* density_data = density->data();
943 lock_guard<mutex> lock(jmutex);
944 for (int j0 = b0offset; j0 != b0offset + b0size; ++j0) {
945 const int j0n = j0 * density->ndim();
946 for (int j1 = b1offset; j1 != b1offset + b1size; ++j1) {
947 if (j1 < j0) {
948 eridata += b2size * b3size;
949 continue;
950 }
951 const unsigned int nj01 = (j0 << shift) + j1;
952 const double scale01 = (j0 == j1) ? 0.5 : 1.0;
953 for (int j2 = b2offset; j2 != b2offset + b2size; ++j2) {
954 const int j2n = j2 * density->ndim();
955 for (int j3 = b3offset; j3 != b3offset + b3size; ++j3, ++eridata) {
956 if (j3 < j2) continue;
957 const unsigned int nj23 = (j2 << shift) + j3;
958 if (nj23 < nj01 && i01 == i23) continue;
959 const double scale23 = (j2 == j3) ? 0.5 : 1.0;
960 const double scale = (nj01 == nj23) ? 0.25 : 0.5;
961
962 const double eri = *eridata;
963 const double intval = eri * scale * scale01 * scale23;
964 const double intval4 = 4.0 * intval;
965 out->element(j1, j0) += density_data[j2n + j3] * intval4;
966 out->element(j3, j2) += density_data[j0n + j1] * intval4;
967 }
968 }
969 }
970 }
971
972 }
973 );
974
975 }
976 }
977 }
978
979 tasks.compute();
980
981 return out;
982 }
983
984
compute_Fock_nf_K(shared_ptr<const Matrix> density,shared_ptr<const VectorB> max_den) const985 shared_ptr<const Matrix> Box::compute_Fock_nf_K(shared_ptr<const Matrix> density, shared_ptr<const VectorB> max_den) const {
986
987 assert(nchild_ == 0);
988 auto out = make_shared<Matrix>(density->ndim(), density->mdim());
989 out->zero();
990
991 // NF: 4c integrals
992 const int shift = sizeof(int) * 4;
993 const int nsh = sqrt(max_den->size());
994 assert (nsh*nsh == max_den->size());
995
996 int ntask = 0;
997 for (auto& v01 : sp_) {
998 if (v01->schwarz() < schwarz_thresh_) continue;
999 const int i0 = v01->shell_ind(1);
1000
1001 const int i1 = v01->shell_ind(0);
1002 if (i1 < i0) continue;
1003
1004 const int i01 = i0 * nsh + i1;
1005 for (int n = 0; n != nneigh_; ++n) {
1006 shared_ptr<const Box> neigh = neigh_[n].lock();
1007 for (auto& v23 : neigh->sp()) {
1008 if (v23->schwarz() < schwarz_thresh_) continue;
1009 const int i2 = v23->shell_ind(1);
1010 if (i2 < i0) continue;
1011 const int i3 = v23->shell_ind(0);
1012 if (i3 < i2) continue;
1013 const int i23 = i2 * nsh + i3;
1014 if (i23 < i01) continue;
1015
1016 const double density_02 = (*max_den)(i0 * nsh + i2);
1017 const double density_03 = (*max_den)(i0 * nsh + i3);
1018 const double density_12 = (*max_den)(i1 * nsh + i2);
1019 const double density_13 = (*max_den)(i1 * nsh + i3);
1020
1021 const double mulfactor = max(max(density_13, density_02),
1022 max(density_03, density_12));
1023 const double integral_bound = mulfactor * v01->schwarz() * v23->schwarz();
1024 const bool skip_schwarz = integral_bound < schwarz_thresh_;
1025 if (skip_schwarz) continue;
1026 ++ntask;
1027 }
1028 }
1029 }
1030
1031 TaskQueue<function<void(void)>> tasks(ntask);
1032 mutex jmutex;
1033
1034 for (auto& v01 : sp_) {
1035 if (v01->schwarz() < schwarz_thresh_) continue;
1036 shared_ptr<const Shell> b0 = v01->shell(1);
1037 const int i0 = v01->shell_ind(1);
1038 const int b0offset = v01->offset(1);
1039 const int b0size = b0->nbasis();
1040
1041 shared_ptr<const Shell> b1 = v01->shell(0);
1042 const int i1 = v01->shell_ind(0);
1043 if (i1 < i0) continue;
1044 const int b1offset = v01->offset(0);
1045 const int b1size = b1->nbasis();
1046
1047 const int i01 = i0 * nsh + i1;
1048 for (int n = 0; n != nneigh_; ++n) {
1049 shared_ptr<const Box> neigh = neigh_[n].lock();
1050 for (auto& v23 : neigh->sp()) {
1051 if (v23->schwarz() < schwarz_thresh_) continue;
1052 shared_ptr<const Shell> b2 = v23->shell(1);
1053 const int i2 = v23->shell_ind(1);
1054 if (i2 < i0) continue;
1055 const int b2offset = v23->offset(1);
1056 const int b2size = b2->nbasis();
1057
1058 shared_ptr<const Shell> b3 = v23->shell(0);
1059 const int i3 = v23->shell_ind(0);
1060 if (i3 < i2) continue;
1061 const int b3offset = v23->offset(0);
1062 const int b3size = b3->nbasis();
1063
1064 const int i23 = i2 * nsh + i3;
1065 if (i23 < i01) continue;
1066
1067 const double density_02 = (*max_den)(i0 * nsh + i2);
1068 const double density_03 = (*max_den)(i0 * nsh + i3);
1069 const double density_12 = (*max_den)(i1 * nsh + i2);
1070 const double density_13 = (*max_den)(i1 * nsh + i3);
1071
1072 const double mulfactor = max(max(density_13, density_02),
1073 max(density_03, density_12));
1074 const double integral_bound = mulfactor * v01->schwarz() * v23->schwarz();
1075 const bool skip_schwarz = integral_bound < schwarz_thresh_;
1076 if (skip_schwarz) continue;
1077
1078 array<shared_ptr<const Shell>,4> input = {{b3, b2, b1, b0}};
1079
1080 tasks.emplace_back(
1081 [this, &out, &density, input, b0offset, i01, i23, b0size, b1offset, b1size, b2offset, b2size, b3offset, b3size, mulfactor, &jmutex]() {
1082
1083 ERIBatch eribatch(input, mulfactor);
1084 eribatch.compute();
1085 const double* eridata = eribatch.data();
1086
1087 const double* density_data = density->data();
1088 lock_guard<mutex> lock(jmutex);
1089 for (int j0 = b0offset; j0 != b0offset + b0size; ++j0) {
1090 const int j0n = j0 * density->ndim();
1091 for (int j1 = b1offset; j1 != b1offset + b1size; ++j1) {
1092 if (j1 < j0) {
1093 eridata += b2size * b3size;
1094 continue;
1095 }
1096 const int j1n = j1 * density->ndim();
1097 const unsigned int nj01 = (j0 << shift) + j1;
1098 const double scale01 = (j0 == j1) ? 0.5 : 1.0;
1099 for (int j2 = b2offset; j2 != b2offset + b2size; ++j2) {
1100 for (int j3 = b3offset; j3 != b3offset + b3size; ++j3, ++eridata) {
1101 if (j3 < j2) continue;
1102 const unsigned int nj23 = (j2 << shift) + j3;
1103 if (nj23 < nj01 && i01 == i23) continue;
1104 const double scale23 = (j2 == j3) ? 0.5 : 1.0;
1105 const double scale = (nj01 == nj23) ? 0.25 : 0.5;
1106
1107 const double eri = *eridata;
1108 const double intval = eri * scale * scale01 * scale23;
1109 out->element(max(j2, j0), min(j2,j0)) -= density_data[j1n + j3] * intval;
1110 out->element(j3, j0) -= density_data[j1n + j2] * intval;
1111 out->element(max(j1,j2), min(j1,j2)) -= density_data[j0n + j3] * intval;
1112 out->element(max(j1,j3), min(j1,j3)) -= density_data[j0n + j2] * intval;
1113 }
1114 }
1115 }
1116 }
1117
1118 }
1119 );
1120
1121 }
1122 }
1123 }
1124
1125 tasks.compute();
1126
1127 return out;
1128 }
1129