1 //
2 // BAGEL - Parallel electron correlation program.
3 // Filename: fmm.cc
4 // Copyright (C) 2016 Toru Shiozaki
5 //
6 // Author: Hai-Anh Le <anh@u.northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // The BAGEL package is free software; you can redistribute it and/or modify
12 // it under the terms of the GNU Library General Public License as published by
13 // the Free Software Foundation; either version 3, or (at your option)
14 // any later version.
15 //
16 // The BAGEL package is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU Library General Public License for more details.
20 //
21 // You should have received a copy of the GNU Library General Public License
22 // along with the BAGEL package; see COPYING.  If not, write to
23 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24 //
25 
26 
27 #include <src/scf/fmm/fmm.h>
28 #include <src/util/taskqueue.h>
29 #include <src/util/parallel/mpi_interface.h>
30 
31 using namespace bagel;
32 using namespace std;
33 
34 
FMM(std::shared_ptr<const PTree> idata,shared_ptr<const Geometry> geom,const bool kbuild)35 FMM::FMM(std::shared_ptr<const PTree> idata, shared_ptr<const Geometry> geom, const bool kbuild) {
36 
37   const bool dodf = idata->get<bool>("df", true);
38   if (dodf) throw runtime_error("FMM only works without DF now");
39   assert(geom->fmm());
40 
41   lmax_ = idata->get<int>("lmax", 10);
42   const int batchsize = idata->get<int>("batch_size", -1);
43   if (batchsize < 0)
44     xbatchsize_ = static_cast<int>(ceil(0.5*geom->nele()/mpi__->size()));
45   debug_ = idata->get<bool>("debug", false);
46 
47   if (kbuild) {
48     auto newgeom = make_shared<const Geometry>(*geom, idata->get<string>("extent_exchange", "yang"));
49     geomdata_ = newgeom->fmm();
50     ns_ = idata->get<int>("ns_exchange", 2);
51     ws_ = idata->get<double>("ws_exchange", 0.0);
52     do_exchange_ = true;
53   } else {
54     geomdata_ = geom->fmm();
55     ns_ = idata->get<int>("ns", 2);
56     ws_ = idata->get<double>("ws", 0.0);
57     do_exchange_ = idata->get<bool>("exchange", true);
58     lmax_k_ = idata->get<int>("lmax_exchange", 2);
59   }
60 
61   centre_ = geom->charge_center();
62   nbasis_ = geom->nbasis();
63   thresh_ = geom->schwarz_thresh();
64   init();
65 }
66 
67 
init()68 void FMM::init() {
69 
70   const int ns2 = pow(2, ns_);
71 
72   const int nsp = geomdata_->nshellpair();
73   double rad = 0;
74   int isp = 0;
75   isp_.reserve(nsp);
76   for (int i = 0; i != nsp; ++i) {
77     if (geomdata_->shellpair(i)->schwarz() < thresh_) continue;
78     isp_.push_back(i);
79     ++isp;
80     for (int j = 0; j != 3; ++j) {
81       const double jco = geomdata_->shellpair(i)->centre(j) - centre_[j];
82       rad = max(rad, abs(jco));
83     }
84   }
85   nsp_ = isp;
86   assert(isp_.size() == nsp_);
87   coordinates_.resize(nsp_);
88   for (int i = 0; i != nsp_; ++i)
89     coordinates_[i] = geomdata_->shellpair(isp_[i])->centre();
90 
91   boxsize_  = 2.05 * rad;
92   unitsize_ = boxsize_/ns2;
93   coordinates_.resize(nsp_);
94 
95   get_boxes();
96 
97   do_ff_ = false;
98   for (int i = 0; i != nbranch_[0]; ++i)
99     if (box_[i]->ninter_ != 0) do_ff_ = true;
100 }
101 
102 
get_boxes()103 void FMM::get_boxes() {
104 
105   Timer fmminit;
106 
107   const int ns2 = pow(2, ns_);
108 
109   // find out unempty leaves
110   vector<array<int, 3>> boxid; // max dimension 2**(ns+1)-1
111   boxid.reserve(nsp_);
112   unique_ptr<int[]> ibox(new int[nsp_]);
113 
114   map<array<int, 3>, int> treemap;
115   assert(treemap.empty());
116   int nleaf = 0;
117   for (int isp = 0; isp != nsp_; ++isp) {
118     array<int, 3> idxbox;
119     for (int i = 0; i != 3; ++i) {
120       const double coi = coordinates_[isp][i]-centre_[i];
121       idxbox[i] = static_cast<int>(floor(coi/unitsize_)) + ns2/2 + 1;
122       assert(idxbox[i] <= ns2 && idxbox[i] > 0);
123     }
124 
125     map<array<int, 3>,int>::iterator box = treemap.find(idxbox);
126     const bool box_found = (box != treemap.end());
127     if (box_found) {
128       ibox[isp] = treemap.find(idxbox)->second;
129     } else {
130       treemap.insert(treemap.end(), pair<array<int, 3>,int>(idxbox, nleaf));
131       ibox[isp] = nleaf;
132       boxid.resize(nleaf+1);
133       boxid[nleaf] = idxbox;
134       ++nleaf;
135     }
136   }
137   assert(nleaf == boxid.size() && nleaf <= nsp_);
138 
139   // get leaves
140   nleaf_ = nleaf;
141   vector<vector<int>> leaves(nleaf);
142   for (int isp = 0; isp != nsp_; ++isp) {
143     const int n = ibox[isp];
144     assert(n < nleaf);
145     const int ingeom = isp_[isp];
146     leaves[n].insert(leaves[n].end(), ingeom);
147   }
148 
149   // get all unempty boxes
150   int nbox = 0;
151   for (int il = 0; il != nleaf; ++il) {
152     vector<shared_ptr<const ShellPair>> sp(leaves[il].size());
153     int cnt = 0;
154     for (auto& isp : leaves[il])
155       sp[cnt++] = geomdata_->shellpair(isp);
156     array<int, 3> id = boxid[il];
157     array<double, 3> centre;
158     for (int i = 0; i != 3; ++i)
159       centre[i] = (id[i]-ns2/2-1)*unitsize_ + 0.5*unitsize_;
160     auto newbox = make_shared<Box>(0, unitsize_, centre, il, id, lmax_, lmax_k_, sp, thresh_);
161     box_.insert(box_.end(), newbox);
162     ++nbox;
163   }
164 
165   int icntc = 0;
166   int icntp = ns2;
167   nbranch_.resize(ns_+1);
168 
169   for (int nss = ns_; nss > -1; --nss) {
170     int nbranch = 0;
171     const int nss2 = pow(2, nss);
172     //const int offset = pow(2, nss-2);
173 
174     for (int i = 1; i != nss2+1; ++i) {
175       for (int j = 1; j != nss2+1; ++j) {
176         for (int k = 1; k != nss2+1; ++k) {
177           vector<shared_ptr<const ShellPair>> sp;
178           array<int, 3> idxp;
179           idxp[0] = static_cast<int>(floor(0.5*(i+1))) + icntp;
180           idxp[1] = static_cast<int>(floor(0.5*(j+1))) + icntp;
181           idxp[2] = static_cast<int>(floor(0.5*(k+1))) + icntp;
182 
183           array<int, 3> idxc = {{i+icntc, j+icntc, k+icntc}};
184           map<array<int, 3>,int>::iterator child = treemap.find(idxc);
185           const bool child_found = (child != treemap.end());
186 
187           if (child_found) {
188             const int ichild = treemap.find(idxc)->second;
189             map<array<int, 3>,int>::iterator parent = treemap.find(idxp);
190             const bool parent_found = (parent != treemap.end());
191 
192             if (!parent_found) {
193               if (nss != 0) {
194                 const double boxsize = unitsize_ * pow(2, ns_-nss+1);
195                 auto newbox = make_shared<Box>(ns_-nss+1, boxsize, array<double, 3>{{0.0, 0.0, 0.0}},
196                      nbox, idxp, lmax_, lmax_k_, box_[ichild]->sp(), thresh_);
197                 box_.insert(box_.end(), newbox);
198                 treemap.insert(treemap.end(), pair<array<int, 3>,int>(idxp, nbox));
199                 box_[nbox]->insert_child(box_[ichild]);
200                 box_[ichild]->insert_parent(box_[nbox]);
201                 ++nbox;
202               }
203               ++nbranch;
204             } else {
205               const int ibox = treemap.find(idxp)->second;
206               box_[ibox]->insert_child(box_[ichild]);
207               box_[ibox]->insert_sp(box_[ichild]->sp());
208               box_[ichild]->insert_parent(box_[ibox]);
209               ++nbranch;
210             }
211           }
212 
213         }
214       }
215     }
216     icntc = icntp;
217     icntp += nss2;
218     nbranch_[ns_-nss] = nbranch;
219   }
220   assert(accumulate(nbranch_.begin(), nbranch_.end(), 0) == nbox);
221   nbox_ = nbox;
222 
223   for (auto& b : box_)
224     b->init();
225 
226   int icnt = 0;
227   for (int ir = ns_; ir > -1; --ir) {
228     vector<weak_ptr<Box>> tmpbox(nbranch_[ir]);
229     for (int ib = 0; ib != nbranch_[ir]; ++ib)
230       tmpbox[ib] = box_[nbox_-icnt-nbranch_[ir]+ib];
231     for (auto& b : tmpbox) {
232       shared_ptr<Box> b0 = b.lock();
233       b0->get_neigh(tmpbox, ws_);
234       b0->get_inter(tmpbox, ws_);
235       b0->sort_sp();
236     }
237     icnt += nbranch_[ir];
238   }
239 
240   if (debug_) {
241     cout << "Centre of Charge: " << setprecision(3) << centre_[0] << "  " << centre_[1] << "  " << centre_[2] << endl;
242     cout << "ns_ = " << ns_ << " nbox = " << nbox_ << "  nleaf = " << nleaf << " nsp = " << nsp_ << " ws = " << ws_ << " lmaxJ " << lmax_;
243     if (do_exchange_)
244       cout << " *** BATCHSIZE " << xbatchsize_ << " lmax_k " << lmax_k_;
245     cout << " boxsize = " << boxsize_ << " leafsize = " << unitsize_ << endl;
246     int i = 0;
247     for (auto& b : box_) {
248       const bool ipar = (b->parent_.lock()) ? true : false;
249       int nsp_neigh = 0;
250       for (int j = 0; j != b->neigh_.size(); ++j) {
251         shared_ptr<const Box> ne = b->neigh_[j].lock();
252         nsp_neigh += ne->nsp_;
253       }
254       int nsp_inter = 0;
255       for (int j = 0; j != b->inter_.size(); ++j) {
256         shared_ptr<const Box> inter = b->inter_[j].lock();
257         nsp_inter += inter->nsp_;
258       }
259       cout << i << " rank = " << b->rank() << "  boxsize = " << b->boxsize() << " extent = " << b->extent() << " nsp = " << b->nsp_
260            << " nchild = " << b->nchild_ << " nneigh = " << b->nneigh_ << " nsp " << nsp_neigh
261            << " ninter = " << b->ninter_ << " nsp " << nsp_inter
262            << " centre = " << b->centre(0) << " " << b->centre(1) << " " << b->centre(2)
263            << " idxc = " << b->tvec()[0] << " " << b->tvec()[1] << " " << b->tvec()[2] << " *** " << ipar << endl;
264       ++i;
265     }
266   }
267 
268   fmminit.tick_print("FMM initialisation");
269 }
270 
271 
M2M(shared_ptr<const Matrix> density,const bool dox) const272 void FMM::M2M(shared_ptr<const Matrix> density, const bool dox) const {
273 
274   Timer m2mtime;
275   for (int i = 0; i != nbranch_[0]; ++i)
276     if (i % mpi__->size() == mpi__->rank())
277       box_[i]->compute_M2M(density);
278 
279   for (int i = 0; i != nbranch_[0]; ++i)
280     mpi__->broadcast(box_[i]->olm()->data(), box_[i]->olm()->size(), i % mpi__->size());
281 
282   m2mtime.tick_print("Compute multipoles");
283 
284   int icnt = nbranch_[0];
285   for (int i = 1; i != ns_+1; ++i) {
286     for (int j = 0; j != nbranch_[i]; ++j, ++icnt)
287       box_[icnt]->compute_M2M(density);
288   }
289   m2mtime.tick_print("M2M pass");
290   assert(icnt == nbox_);
291 }
292 
293 
M2M_X(shared_ptr<const Matrix> ocoeff_sj,shared_ptr<const Matrix> ocoeff_ui) const294 void FMM::M2M_X(shared_ptr<const Matrix> ocoeff_sj, shared_ptr<const Matrix> ocoeff_ui) const {
295 
296   Timer m2mtime;
297 
298   int icnt = 0;
299   for (int i = 0; i != ns_+1; ++i) {
300     for (int j = 0; j != nbranch_[i]; ++j, ++icnt) {
301       box_[icnt]->compute_M2M_X(ocoeff_sj, ocoeff_ui);
302       if (icnt >= nbox_) throw logic_error("Trying to access beyond nbox in M2M_X");
303     }
304   }
305   m2mtime.tick_print("M2M-X pass");
306   assert(icnt == nbox_);
307 }
308 
309 
M2L(const bool dox) const310 void FMM::M2L(const bool dox) const {
311 
312   Timer m2ltime;
313 
314   TaskQueue<function<void(void)>> tasks(nbox_);
315   if (!dox) {
316     for (auto& b : box_)
317       tasks.emplace_back(
318         [this, b]() { b->compute_M2L(); }
319       );
320     tasks.compute();
321     m2ltime.tick_print("M2L pass");
322   } else {
323     for (auto& b : box_)
324       tasks.emplace_back(
325         [this, b]() { b->compute_M2L_X(); }
326       );
327     tasks.compute();
328     m2ltime.tick_print("M2L-X pass");
329   }
330 }
331 
332 
L2L(const bool dox) const333 void FMM::L2L(const bool dox) const {
334 
335   Timer l2ltime;
336 
337   int icnt = 0;
338   if (!dox) {
339     for (int ir = ns_; ir > -1; --ir) {
340       for (int ib = 0; ib != nbranch_[ir]; ++ib)
341         box_[nbox_-icnt-nbranch_[ir]+ib]->compute_L2L();
342 
343       icnt += nbranch_[ir];
344     }
345 
346     l2ltime.tick_print("L2L pass");
347   } else {
348     for (int ir = ns_; ir > -1; --ir) {
349       for (int ib = 0; ib != nbranch_[ir]; ++ib) {
350         const int ibox = nbox_-icnt-nbranch_[ir]+ib;
351         box_[ibox]->compute_L2L_X();
352         if (icnt >= nbox_) throw logic_error("Trying to access beyond nbox in M2M_X");
353       }
354 
355       icnt += nbranch_[ir];
356     }
357     l2ltime.tick_print("L2L-X pass");
358   }
359 }
360 
361 
compute_Fock_FMM(shared_ptr<const Matrix> density) const362 shared_ptr<const Matrix> FMM::compute_Fock_FMM(shared_ptr<const Matrix> density) const {
363 
364   auto out = make_shared<Matrix>(nbasis_, nbasis_);
365   out->zero();
366 
367   Timer fmmtime;
368   M2M(density);
369   M2L();
370   L2L();
371 
372   Timer nftime;
373 
374   if (density) {
375     assert(nbasis_ == density->ndim());
376     auto maxden = make_shared<VectorB>(geomdata_->nshellpair());
377     const double* density_data = density->data();
378     for (int i01 = 0; i01 != geomdata_->nshellpair(); ++i01) {
379       shared_ptr<const Shell> sh0 = geomdata_->shellpair(i01)->shell(1);
380       const int offset0 = geomdata_->shellpair(i01)->offset(1);
381       const int size0 = sh0->nbasis();
382 
383       shared_ptr<const Shell> sh1 = geomdata_->shellpair(i01)->shell(0);
384       const int offset1 = geomdata_->shellpair(i01)->offset(0);
385       const int size1 = sh1->nbasis();
386 
387       double denmax = 0.0;
388       for (int i0 = offset0; i0 != offset0 + size0; ++i0) {
389         const int i0n = i0 * density->ndim();
390         for (int i1 = offset1; i1 != offset1 + size1; ++i1)
391           denmax = max(denmax, fabs(density_data[i0n + i1]));
392       }
393       (*maxden)(i01) = denmax;
394     }
395 
396     auto ff = make_shared<Matrix>(nbasis_, nbasis_);
397     for (int i = 0; i != nbranch_[0]; ++i)
398       if (i % mpi__->size() == mpi__->rank()) {
399         auto ei = box_[i]->compute_Fock_nf(density, maxden);
400         blas::ax_plus_y_n(1.0, ei->data(), nbasis_*nbasis_, out->data());
401         auto ffi = box_[i]->compute_Fock_ff(density);
402         blas::ax_plus_y_n(1.0, ffi->data(), nbasis_*nbasis_, ff->data());
403       }
404     out->allreduce();
405     ff->allreduce();
406 
407     const double enj = 0.5*density->dot_product(*ff);
408     cout << "    o  Far-field Coulomb energy: " << setprecision(9) << enj << endl;
409 
410     for (int i = 0; i != nbasis_; ++i) out->element(i, i) *= 2.0;
411     out->fill_upper();
412     *out += *ff;
413   }
414 
415   nftime.tick_print("near-field");
416   fmmtime.tick_print("FMM-J");
417 
418   return out;
419 }
420 
421 
compute_K_ff(shared_ptr<const Matrix> ocoeff,shared_ptr<const Matrix> overlap) const422 shared_ptr<const Matrix> FMM::compute_K_ff(shared_ptr<const Matrix> ocoeff, shared_ptr<const Matrix> overlap) const {
423 
424   if (!do_exchange_ || !ocoeff)
425     return overlap->clone();
426 
427   const int nocc = ocoeff->mdim();
428   auto krj = make_shared<Matrix>(nbasis_, nocc);
429   const int nbatch = (nocc-1) / xbatchsize_+1;
430   StaticDist dist(nocc, nbatch);
431   vector<pair<size_t, size_t>> table = dist.atable();
432 
433   Timer ktime;
434   int u = 0;
435   for (auto& itable : table) {
436     if (u++ % mpi__->size() == mpi__->rank()) {
437       if (debug_)
438         cout << "BATCH " << u << "  from " << itable.first << " doing " << itable.second << " MPI " << u << endl;
439       auto ocoeff_ui = make_shared<const Matrix>(ocoeff->slice(itable.first, itable.first+itable.second));
440       shared_ptr<const Matrix> ocoeff_sj = ocoeff;
441 
442       {
443         M2M_X(ocoeff_sj, ocoeff_ui);
444         M2L(true);
445         L2L(true);
446       }
447       Timer assembletime;
448       for (int i = 0; i != nbranch_[0]; ++i) {
449         auto ffx = box_[i]->compute_Fock_ff_K(ocoeff_ui);
450         assert(ffx->ndim() == nbasis_ && ffx->mdim() == nocc);
451         blas::ax_plus_y_n(1.0, ffx->data(), nbasis_*nocc, krj->data());
452       }
453       assembletime.tick_print("Building Krj");
454     }
455   }
456   krj->allreduce();
457 
458   if (debug_) {
459     Timer projtime;
460     auto kij = make_shared<const Matrix>(*ocoeff % *krj);
461     // check kij is symmetric
462     auto kji = kij->transpose();
463     const double err = (*kij - *kji).rms();
464     if (err > 1e-15 && debug_)
465       cout << " *** Warning: Kij is not symmetric: rms(K-K^T) = " << setprecision(20) << err << endl;
466 
467     projtime.tick_print("Projecting K");
468   }
469 
470   ktime.tick_print("FMM-K");
471   const double enk = ocoeff->dot_product(*krj);
472   cout << "          o    Far-field Exchange energy: " << setprecision(9) << enk << endl;
473 
474   return krj;
475 }
476 
477 
print_boxes(const int i) const478 void FMM::print_boxes(const int i) const {
479 
480   int ib = 0;
481   for (auto& b : box_) {
482     if (b->rank() == i) {
483       cout << "Box " << ib << " rank = " << i << " *** size " << b->boxsize() << " *** nchild = " << b->nchild_ << " *** nsp = " << b->nsp_ << " *** Shell pairs at:" << endl;
484       for (int i = 0; i != b->nsp_; ++i)
485         cout << setprecision(5) << b->sp()[i]->centre(0) << "  " << b->sp()[i]->centre(1) << "  " << b->sp()[i]->centre(2) << endl;
486       ++ib;
487     }
488     if (b->rank() > i) break;
489   }
490 
491 }
492 
493 
compute_Fock_FMM_K(shared_ptr<const Matrix> density) const494 shared_ptr<const Matrix> FMM::compute_Fock_FMM_K(shared_ptr<const Matrix> density) const {
495 
496   auto out = make_shared<Matrix>(nbasis_, nbasis_);
497   out->zero();
498 
499   Timer nftime;
500   if (density) {
501     assert(nbasis_ == density->ndim());
502     auto maxden = make_shared<VectorB>(geomdata_->nshellpair());
503     const double* density_data = density->data();
504     for (int i01 = 0; i01 != geomdata_->nshellpair(); ++i01) {
505       shared_ptr<const Shell> sh0 = geomdata_->shellpair(i01)->shell(1);
506       const int offset0 = geomdata_->shellpair(i01)->offset(1);
507       const int size0 = sh0->nbasis();
508 
509       shared_ptr<const Shell> sh1 = geomdata_->shellpair(i01)->shell(0);
510       const int offset1 = geomdata_->shellpair(i01)->offset(0);
511       const int size1 = sh1->nbasis();
512 
513       double denmax = 0.0;
514       for (int i0 = offset0; i0 != offset0 + size0; ++i0) {
515         const int i0n = i0 * density->ndim();
516         for (int i1 = offset1; i1 != offset1 + size1; ++i1)
517           denmax = max(denmax, fabs(density_data[i0n + i1]));
518       }
519       (*maxden)(i01) = denmax;
520     }
521 
522     for (int i = 0; i != nbranch_[0]; ++i)
523       if (i % mpi__->size() == mpi__->rank()) {
524         auto ei = box_[i]->compute_Fock_nf_K(density, maxden);
525         blas::ax_plus_y_n(1.0, ei->data(), nbasis_*nbasis_, out->data());
526       }
527     out->allreduce();
528 
529     for (int i = 0; i != nbasis_; ++i) out->element(i, i) *= 2.0;
530     out->fill_upper();
531   }
532 
533   nftime.tick_print("near-field-K");
534 
535   return out;
536 }
537 
538 
compute_Fock_FMM_J(shared_ptr<const Matrix> density) const539 shared_ptr<const Matrix> FMM::compute_Fock_FMM_J(shared_ptr<const Matrix> density) const {
540 
541   auto out = make_shared<Matrix>(nbasis_, nbasis_);
542   out->zero();
543 
544   Timer fmmtime;
545   M2M(density);
546   M2L();
547   L2L();
548 
549   Timer jtime;
550 
551   if (density) {
552     assert(nbasis_ == density->ndim());
553     auto maxden = make_shared<VectorB>(geomdata_->nshellpair());
554     const double* density_data = density->data();
555     for (int i01 = 0; i01 != geomdata_->nshellpair(); ++i01) {
556       shared_ptr<const Shell> sh0 = geomdata_->shellpair(i01)->shell(1);
557       const int offset0 = geomdata_->shellpair(i01)->offset(1);
558       const int size0 = sh0->nbasis();
559 
560       shared_ptr<const Shell> sh1 = geomdata_->shellpair(i01)->shell(0);
561       const int offset1 = geomdata_->shellpair(i01)->offset(0);
562       const int size1 = sh1->nbasis();
563 
564       double denmax = 0.0;
565       for (int i0 = offset0; i0 != offset0 + size0; ++i0) {
566         const int i0n = i0 * density->ndim();
567         for (int i1 = offset1; i1 != offset1 + size1; ++i1)
568           denmax = max(denmax, fabs(density_data[i0n + i1]));
569       }
570       (*maxden)(i01) = denmax;
571     }
572 
573     auto ff = make_shared<Matrix>(nbasis_, nbasis_);
574     for (int i = 0; i != nbranch_[0]; ++i)
575       if (i % mpi__->size() == mpi__->rank()) {
576         auto ffi = box_[i]->compute_Fock_ff(density);
577         blas::ax_plus_y_n(1.0, ffi->data(), nbasis_*nbasis_, ff->data());
578       }
579     ff->allreduce();
580     fmmtime.tick_print("FMM-J");
581 
582     for (int i = 0; i != nbranch_[0]; ++i)
583       if (i % mpi__->size() == mpi__->rank()) {
584         auto ei = box_[i]->compute_Fock_nf_J(density, maxden);
585         blas::ax_plus_y_n(1.0, ei->data(), nbasis_*nbasis_, out->data());
586       }
587     out->allreduce();
588 
589     const double enj = 0.5*density->dot_product(*ff);
590     cout << "    o  Far-field Coulomb energy: " << setprecision(9) << enj << endl;
591 
592     for (int i = 0; i != nbasis_; ++i) out->element(i, i) *= 2.0;
593     out->fill_upper();
594     ff->fill_lower();
595 
596     *out += *ff;
597   }
598 
599   jtime.tick_print("J build");
600 
601   return out;
602 }
603