1 /*
2 * This source code is part of
3 *
4 * E R K A L E
5 * -
6 * DFT from Hel
7 *
8 * Written by Susi Lehtola, 2010-2011
9 * Copyright (c) 2010-2011, Susi Lehtola
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
15 */
16
17 #include "density_fitting.h"
18 #include "linalg.h"
19 #include "scf.h"
20 #include "stringutil.h"
21 #include <sstream>
22
23 #ifdef _OPENMP
24 #include <omp.h>
25 #endif
26
27 // \delta parameter in Eichkorn et al
28 #define DELTA 1e-9
29
DensityFit()30 DensityFit::DensityFit() {
31 omega=0.0;
32 alpha=1.0;
33 beta=0.0;
34 }
35
~DensityFit()36 DensityFit::~DensityFit() {
37 }
38
set_range_separation(double w,double a,double b)39 void DensityFit::set_range_separation(double w, double a, double b) {
40 omega=w;
41 alpha=a;
42 beta=b;
43 }
44
get_range_separation(double & w,double & a,double & b) const45 void DensityFit::get_range_separation(double & w, double & a, double & b) const {
46 w=omega;
47 a=alpha;
48 b=beta;
49 }
50
Bmat_enabled() const51 bool DensityFit::Bmat_enabled() const {
52 return Bmat;
53 }
54
fill(const BasisSet & orbbas,const BasisSet & auxbas,bool dir,double erithr,double linthr,bool bmat)55 size_t DensityFit::fill(const BasisSet & orbbas, const BasisSet & auxbas, bool dir, double erithr, double linthr, bool bmat) {
56 // Construct density fitting basis
57
58 // Store amount of functions
59 Nbf=orbbas.get_Nbf();
60 Naux=auxbas.get_Nbf();
61 Nnuc=orbbas.get_Nnuc();
62 direct=dir;
63 Bmat=bmat;
64
65 // Fill list of shell pairs
66 arma::mat Q, M;
67 orbpairs=orbbas.get_eripairs(Q,M,erithr);
68
69 // Get orbital shells, auxiliary shells and dummy shell
70 orbshells=orbbas.get_shells();
71 auxshells=auxbas.get_shells();
72 dummy=dummyshell();
73
74 maxorbam=orbbas.get_max_am();
75 maxauxam=auxbas.get_max_am();
76 maxorbcontr=orbbas.get_max_Ncontr();
77 maxauxcontr=auxbas.get_max_Ncontr();
78 maxam=std::max(orbbas.get_max_am(),auxbas.get_max_am());
79 maxcontr=std::max(orbbas.get_max_Ncontr(),auxbas.get_max_Ncontr());
80
81 // First, compute the two-center integrals
82 ab.zeros(Naux,Naux);
83
84 // Get list of unique auxiliary shell pairs
85 std::vector<shellpair_t> auxpairs=auxbas.get_unique_shellpairs();
86
87 #ifdef _OPENMP
88 #pragma omp parallel
89 #endif
90 {
91 ERIWorker *eri;
92 if(omega==0.0 && alpha==1.0 && beta==0.0)
93 eri=new ERIWorker(maxam,maxcontr);
94 else
95 eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
96 const std::vector<double> * erip;
97
98 #ifdef _OPENMP
99 #pragma omp for schedule(dynamic)
100 #endif
101 for(size_t ip=0;ip<auxpairs.size();ip++) {
102 // The shells in question are
103 size_t is=auxpairs[ip].is;
104 size_t js=auxpairs[ip].js;
105
106 // Compute (a|b)
107 eri->compute(&auxshells[is],&dummy,&auxshells[js],&dummy);
108 erip=eri->getp();
109
110 // Store integrals
111 size_t Ni=auxshells[is].get_Nbf();
112 size_t Nj=auxshells[js].get_Nbf();
113 for(size_t ii=0;ii<Ni;ii++) {
114 size_t ai=auxshells[is].get_first_ind()+ii;
115 for(size_t jj=0;jj<Nj;jj++) {
116 size_t aj=auxshells[js].get_first_ind()+jj;
117
118 ab(ai,aj)=(*erip)[ii*Nj+jj];
119 ab(aj,ai)=(*erip)[ii*Nj+jj];
120 }
121 }
122 }
123
124 delete eri;
125 }
126
127 if(Bmat) {
128 // Form ab^-1 and ab^-1/2
129 arma::mat abvec;
130 arma::vec abval;
131 eig_sym_ordered(abval,abvec,ab);
132
133 // Count linearly independent vectors
134 size_t Nind=0;
135 for(size_t i=0;i<abval.n_elem;i++)
136 if(abval(i)>=linthr)
137 Nind++;
138
139 // and drop the linearly dependent ones
140 abval=abval.subvec(abval.n_elem-Nind,abval.n_elem-1);
141 abvec=abvec.cols(abvec.n_cols-Nind,abvec.n_cols-1);
142
143 // Form matrices
144 ab_inv.zeros(abvec.n_rows,abvec.n_rows);
145 ab_invh.zeros(abvec.n_rows,abvec.n_rows);
146 for(size_t i=0;i<abval.n_elem;i++) {
147 ab_inv+=abvec.col(i)*arma::trans(abvec.col(i))/abval(i);
148 ab_invh+=abvec.col(i)*arma::trans(abvec.col(i))/sqrt(abval(i));
149 }
150 } else {
151 // Just RI-J(K), so use faster method from Eichkorn et al to form ab_inv only
152 ab_inv=arma::inv(ab + DELTA*arma::eye(ab.n_rows,ab.n_cols));
153 }
154
155 // Then, compute the three-center integrals
156 if(!direct) {
157 a_munu.resize(orbpairs.size());
158 #ifdef _OPENMP
159 #pragma omp parallel
160 #endif
161 {
162 ERIWorker *eri;
163 if(omega==0.0 && alpha==1.0 && beta==0.0)
164 eri=new ERIWorker(maxam,maxcontr);
165 else
166 eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
167
168 #ifdef _OPENMP
169 #pragma omp for schedule(dynamic)
170 #endif
171 for(size_t ip=0;ip<orbpairs.size();ip++)
172 a_munu[ip]=compute_a_munu(eri,ip);
173
174 delete eri;
175 }
176 }
177
178 return orbpairs.size();
179 }
180
compute_a_munu(ERIWorker * eri,size_t ip) const181 arma::mat DensityFit::compute_a_munu(ERIWorker *eri, size_t ip) const {
182 // Shells in question are
183 size_t imus=orbpairs[ip].is;
184 size_t inus=orbpairs[ip].js;
185 // Amount of functions
186 size_t Nmu=orbshells[imus].get_Nbf();
187 size_t Nnu=orbshells[inus].get_Nbf();
188
189 // Allocate storage
190 arma::mat amunu;
191 amunu.zeros(Naux,Nmu*Nnu);
192 #ifdef _OPENMP
193 #pragma omp parallel for schedule(dynamic)
194 #endif
195 for(size_t ia=0;ia<auxshells.size();ia++) {
196 // Number of functions on shell
197 size_t Na=auxshells[ia].get_Nbf();
198 size_t a0=auxshells[ia].get_first_ind();
199
200 // Compute (a|vu)
201 eri->compute(&auxshells[ia],&dummy,&orbshells[inus],&orbshells[imus]);
202 const std::vector<double> * erip(eri->getp());
203
204 // Store integrals
205 for(size_t a=0;a<Na;a++)
206 for(size_t imunu=0;imunu<Nmu*Nnu;imunu++)
207 // Use Fortran ordering so it's compatible with Armadillo
208 //amunu(a0+a,nu*Nmu+mu)=(*erip)[(a*Nnu+nu)*Nmu+mu];
209 amunu(a0+a,imunu)=(*erip)[a*Nnu*Nmu+imunu];
210 }
211
212 return amunu;
213 }
214
digest_Jexp(const arma::mat & P,size_t ip,const arma::mat & amunu,arma::vec & gamma) const215 void DensityFit::digest_Jexp(const arma::mat & P, size_t ip, const arma::mat & amunu, arma::vec & gamma) const {
216 if(P.n_rows != Nbf || P.n_cols != Nbf) {
217 std::ostringstream oss;
218 oss << "Error in DensityFit: Nbf = " << Nbf << ", P.n_rows = " << P.n_rows << ", P.n_cols = " << P.n_cols << "!\n";
219 throw std::logic_error(oss.str());
220 }
221
222 // Shells in question are
223 size_t imus=orbpairs[ip].is;
224 size_t inus=orbpairs[ip].js;
225 // First function on shell
226 size_t mubeg=orbshells[imus].get_first_ind();
227 size_t nubeg=orbshells[inus].get_first_ind();
228 // Amount of functions
229 size_t muend=orbshells[imus].get_last_ind();
230 size_t nuend=orbshells[inus].get_last_ind();
231
232 // Density submatrix
233 arma::vec Psub;
234 if(imus != inus)
235 // On the off-diagonal we're twice degenerate
236 Psub=2.0*arma::vectorise(P.submat(mubeg,nubeg,muend,nuend));
237 else
238 Psub=arma::vectorise(P.submat(mubeg,nubeg,muend,nuend));
239
240 // Contract over indices
241 gamma+=amunu*Psub;
242 }
243
digest_J(const arma::mat & gamma,size_t ip,const arma::mat & amunu,arma::mat & J) const244 void DensityFit::digest_J(const arma::mat & gamma, size_t ip, const arma::mat & amunu, arma::mat & J) const {
245 // Shells in question are
246 size_t imus=orbpairs[ip].is;
247 size_t inus=orbpairs[ip].js;
248 // First function on shell
249 size_t mu0=orbshells[imus].get_first_ind();
250 size_t nu0=orbshells[inus].get_first_ind();
251 // Amount of functions
252 size_t Nmu=orbshells[imus].get_Nbf();
253 size_t Nnu=orbshells[inus].get_Nbf();
254
255 // vec(uv) = c_a (a,uv)
256 arma::mat Jsub(arma::trans(gamma)*amunu);
257 // Reshape into matrix
258 Jsub.reshape(Nmu,Nnu);
259
260 J.submat(mu0,nu0,mu0+Nmu-1,nu0+Nnu-1)=Jsub;
261 J.submat(nu0,mu0,nu0+Nnu-1,mu0+Nmu-1)=arma::trans(Jsub);
262 }
263
digest_K_incore(const arma::mat & C,const arma::vec & occs,arma::mat & K) const264 void DensityFit::digest_K_incore(const arma::mat & C, const arma::vec & occs, arma::mat & K) const {
265 if(C.n_rows != Nbf) {
266 std::ostringstream oss;
267 oss << "Error in DensityFit: Nbf = " << Nbf << ", C.n_rows = " << C.n_rows << "!\n";
268 throw std::logic_error(oss.str());
269 }
270
271 // a_munu contains (a,uv). We want to build the exchange
272 // K_uv = \sum_i n_i (ui|vi) = \sum_i n_i (a|ui) (a|b)^-1 (b|vi)
273 for(size_t io=0;io<C.n_cols;io++) {
274 // Helper array
275 arma::mat aui(Naux,Nbf);
276 aui.zeros();
277
278 // Fill integrals
279 #ifdef _OPENMP
280 #pragma omp parallel
281 #endif
282 {
283 #ifdef _OPENMP
284 arma::mat hlp(aui);
285 #pragma omp for schedule(dynamic)
286 #endif
287 for(size_t ip=0;ip<orbpairs.size();ip++) {
288 // Shells in question are
289 size_t imus=orbpairs[ip].is;
290 size_t inus=orbpairs[ip].js;
291 // First function on shell
292 size_t mu0=orbshells[imus].get_first_ind();
293 size_t nu0=orbshells[inus].get_first_ind();
294 // Amount of functions
295 size_t Nmu=orbshells[imus].get_Nbf();
296 size_t Nnu=orbshells[inus].get_Nbf();
297
298 // The array amunu is stored in the form amunu(a,nu*Nmu+mu) =
299 // amunu[(nu*Nmu+mu)*Naux+a], so we can reshape it to a
300 // (Naux*Nmu,Nnu) matrix. Half-transformed (a|u;i) is then
301 arma::mat ui(arma::reshape(a_munu[ip],Naux*Nmu,Nnu)*C.submat(nu0,io,nu0+Nnu-1,io));
302 ui.reshape(Naux,Nmu);
303
304 #ifdef _OPENMP
305 hlp.cols(mu0,mu0+Nmu-1)+=ui;
306 #else
307 aui.cols(mu0,mu0+Nmu-1)+=ui;
308 #endif
309
310 if(imus != inus) {
311 // Get (a|vu)
312 arma::mat anumu(Naux,Nmu*Nnu);
313 anumu.zeros();
314 for(size_t mu=0;mu<Nmu;mu++)
315 for(size_t nu=0;nu<Nnu;nu++)
316 anumu.col(mu*Nnu+nu)=a_munu[ip].col(nu*Nmu+mu);
317
318 // Half-transformed (a|v;i) is
319 arma::mat vi(arma::reshape(anumu,Naux*Nnu,Nmu)*C.submat(mu0,io,mu0+Nmu-1,io));
320 vi.reshape(Naux,Nnu);
321
322 #ifdef _OPENMP
323 hlp.cols(nu0,nu0+Nnu-1)+=vi;
324 #else
325 aui.cols(nu0,nu0+Nnu-1)+=vi;
326 #endif
327 }
328 }
329 #ifdef _OPENMP
330 #pragma omp critical
331 aui+=hlp;
332 #endif
333 }
334
335 // K_uv = (ui|vi) = (a|ui) (a|b)^-1 (b|vi)
336 if(Bmat) {
337 aui = ab_invh*aui;
338 K += occs[io]*arma::trans(aui)*aui;
339 } else {
340 K += occs[io]*arma::trans(aui)*ab_inv*aui;
341 }
342 }
343 }
344
digest_K_incore(const arma::cx_mat & C,const arma::vec & occs,arma::cx_mat & K) const345 void DensityFit::digest_K_incore(const arma::cx_mat & C, const arma::vec & occs, arma::cx_mat & K) const {
346 if(C.n_rows != Nbf) {
347 std::ostringstream oss;
348 oss << "Error in DensityFit: Nbf = " << Nbf << ", C.n_rows = " << C.n_rows << "!\n";
349 throw std::logic_error(oss.str());
350 }
351
352 // a_munu contains (a,uv). We want to build the exchange
353 // K_uv = \sum_i n_i (ui|vi) = \sum_i n_i (a|ui) (a|b)^-1 (b|vi)
354 for(size_t io=0;io<C.n_cols;io++) {
355 // Helper array
356 arma::cx_mat aui(Naux,Nbf);
357 aui.zeros();
358
359 // Fill integrals
360 #ifdef _OPENMP
361 #pragma omp parallel
362 #endif
363 {
364 #ifdef _OPENMP
365 arma::cx_mat hlp(aui);
366 #pragma omp for schedule(dynamic)
367 #endif
368 for(size_t ip=0;ip<orbpairs.size();ip++) {
369 // Shells in question are
370 size_t imus=orbpairs[ip].is;
371 size_t inus=orbpairs[ip].js;
372 // First function on shell
373 size_t mu0=orbshells[imus].get_first_ind();
374 size_t nu0=orbshells[inus].get_first_ind();
375 // Amount of functions
376 size_t Nmu=orbshells[imus].get_Nbf();
377 size_t Nnu=orbshells[inus].get_Nbf();
378
379 // The array amunu is stored in the form amunu(a,nu*Nmu+mu) =
380 // amunu[(nu*Nmu+mu)*Naux+a], so we can reshape it to a
381 // (Naux*Nmu,Nnu) matrix. Half-transformed (a|u;i) is then
382 arma::cx_mat ui(arma::reshape(a_munu[ip],Naux*Nmu,Nnu)*C.submat(nu0,io,nu0+Nnu-1,io));
383 ui.reshape(Naux,Nmu);
384
385 #ifdef _OPENMP
386 hlp.cols(mu0,mu0+Nmu-1)+=ui;
387 #else
388 aui.cols(mu0,mu0+Nmu-1)+=ui;
389 #endif
390
391 if(imus != inus) {
392 // Get (a|vu)
393 arma::mat anumu(Naux,Nmu*Nnu);
394 anumu.zeros();
395 for(size_t mu=0;mu<Nmu;mu++)
396 for(size_t nu=0;nu<Nnu;nu++)
397 anumu.col(mu*Nnu+nu)=a_munu[ip].col(nu*Nmu+mu);
398
399 // Half-transformed (a|v;i) is
400 arma::cx_mat vi(arma::reshape(anumu,Naux*Nnu,Nmu)*C.submat(mu0,io,mu0+Nmu-1,io));
401 vi.reshape(Naux,Nnu);
402
403 #ifdef _OPENMP
404 hlp.cols(nu0,nu0+Nnu-1)+=vi;
405 #else
406 aui.cols(nu0,nu0+Nnu-1)+=vi;
407 #endif
408 }
409 }
410 #ifdef _OPENMP
411 #pragma omp critical
412 aui+=hlp;
413 #endif
414 }
415
416 // K_uv = (ui|vi) = (a|ui) (a|b)^-1 (b|vi)
417 if(Bmat) {
418 aui = ab_invh*aui;
419 K += occs[io]*arma::trans(aui)*aui;
420 } else {
421 K += occs[io]*arma::trans(aui)*ab_inv*aui;
422 }
423 }
424 }
425
digest_K_direct(const arma::mat & C,const arma::vec & occs,arma::mat & K) const426 void DensityFit::digest_K_direct(const arma::mat & C, const arma::vec & occs, arma::mat & K) const {
427 if(C.n_rows != Nbf) {
428 std::ostringstream oss;
429 oss << "Error in DensityFit: Nbf = " << Nbf << ", C.n_rows = " << C.n_rows << "!\n";
430 throw std::logic_error(oss.str());
431 }
432
433 // a_munu contains (a,uv). We want to build the exchange
434 // K_uv = \sum_i n_i (ui|vi) = \sum_i n_i (a|ui) (a|b)^-1 (b|vi)
435
436 // Stack of helper matrices
437 std::vector<arma::mat> aui(C.n_cols);
438 for(size_t i=0;i<aui.size();i++)
439 aui[i].zeros(Naux,Nbf);
440
441 #ifdef _OPENMP
442 #pragma omp parallel
443 #endif
444 {
445 // Thread-private helper
446 #ifdef _OPENMP
447 std::vector<arma::mat> auithr(aui);
448 #endif
449
450 ERIWorker *eri;
451 if(omega==0.0 && alpha==1.0 && beta==0.0)
452 eri=new ERIWorker(maxam,maxcontr);
453 else
454 eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
455
456 #ifdef _OPENMP
457 #pragma omp for schedule(dynamic)
458 #endif
459 for(size_t ip=0;ip<orbpairs.size();ip++) {
460 // Calculate integrals
461 arma::mat amunu(compute_a_munu(eri,ip));
462 // Shells in question are
463 size_t imus=orbpairs[ip].is;
464 size_t inus=orbpairs[ip].js;
465 // First function on shell
466 size_t mu0=orbshells[imus].get_first_ind();
467 size_t nu0=orbshells[inus].get_first_ind();
468 // Amount of functions
469 size_t Nmu=orbshells[imus].get_Nbf();
470 size_t Nnu=orbshells[inus].get_Nbf();
471
472 // Get (a|vu)
473 arma::mat anumu;
474 if(imus!=inus) {
475 anumu.zeros(Naux,Nmu*Nnu);
476 for(size_t mu=0;mu<Nmu;mu++)
477 for(size_t nu=0;nu<Nnu;nu++)
478 anumu.col(mu*Nnu+nu)=amunu.col(nu*Nmu+mu);
479 }
480
481 // Loop over orbitals
482 for(size_t io=0;io<C.n_cols;io++) {
483 // The array amunu is stored in the form amunu(a,nu*Nmu+mu) =
484 // amunu[(nu*Nmu+mu)*Naux+a], so we can reshape it to a
485 // (Naux*Nmu,Nnu) matrix. Half-transformed (a|u;i) is then
486 arma::mat ui(arma::reshape(amunu,Naux*Nmu,Nnu)*C.submat(nu0,io,nu0+Nnu-1,io));
487 ui.reshape(Naux,Nmu);
488
489 #ifdef _OPENMP
490 auithr[io].cols(mu0,mu0+Nmu-1)+=ui;
491 #else
492 aui[io].cols(mu0,mu0+Nmu-1)+=ui;
493 #endif
494
495 if(imus != inus) {
496 // Half-transformed (a|v;i) is
497 arma::mat vi(arma::reshape(anumu,Naux*Nnu,Nmu)*C.submat(mu0,io,mu0+Nmu-1,io));
498 vi.reshape(Naux,Nnu);
499
500 #ifdef _OPENMP
501 auithr[io].cols(nu0,nu0+Nnu-1)+=vi;
502 #else
503 aui[io].cols(nu0,nu0+Nnu-1)+=vi;
504 #endif
505 }
506 }
507 }
508
509 #ifdef _OPENMP
510 #pragma omp critical
511 for(size_t io=0;io<C.n_cols;io++)
512 aui[io]+=auithr[io];
513 #endif
514 }
515
516 // K_uv = (ui|vi) = (a|ui) (a|b)^-1 (b|vi)
517 if(Bmat) {
518 for(size_t io=0;io<C.n_cols;io++) {
519 aui[io] = ab_invh*aui[io];
520 K += occs[io]*arma::trans(aui[io])*aui[io];
521 }
522 } else {
523 for(size_t io=0;io<C.n_cols;io++) {
524 K += occs[io]*arma::trans(aui[io])*ab_inv*aui[io];
525 }
526 }
527 }
528
memory_estimate(const BasisSet & orbbas,const BasisSet & auxbas,double thr,bool dir) const529 size_t DensityFit::memory_estimate(const BasisSet & orbbas, const BasisSet & auxbas, double thr, bool dir) const {
530 // Amount of auxiliary functions (for representing the electron density)
531 size_t Na=auxbas.get_Nbf();
532 // Amount of memory required for calculation
533 size_t Nmem=0;
534
535 // Memory taken up by ( \alpha | \mu \nu)
536 if(!dir) {
537 // Form screening matrix
538 arma::mat Q, M;
539 std::vector<eripair_t> opairs=orbbas.get_eripairs(Q,M,thr);
540
541 // Count number of function pairs
542 size_t np=0;
543 for(size_t ip=0;ip<opairs.size();ip++)
544 np+=orbbas.get_Nbf(opairs[ip].is)*orbbas.get_Nbf(opairs[ip].js);
545 Nmem+=Na*np*sizeof(double);
546 }
547
548 // Memory taken by (\alpha | \beta) and its inverse
549 Nmem+=2*Na*Na*sizeof(double);
550 if(Bmat)
551 // We also have (a|b)^(-1/2)
552 Nmem+=Na*Na*sizeof(double);
553
554 // Memory taken by gamma and expansion coefficients
555 Nmem+=2*Na*sizeof(double);
556
557 return Nmem;
558 }
559
compute_expansion(const arma::mat & P) const560 arma::vec DensityFit::compute_expansion(const arma::mat & P) const {
561 if(P.n_rows != Nbf || P.n_cols != Nbf) {
562 std::ostringstream oss;
563 oss << "Error in DensityFit: Nbf = " << Nbf << ", P.n_rows = " << P.n_rows << ", P.n_cols = " << P.n_cols << "!\n";
564 throw std::logic_error(oss.str());
565 }
566
567 arma::vec gamma(Naux);
568 gamma.zeros();
569
570 // Compute gamma
571 if(!direct) {
572 #ifdef _OPENMP
573 #pragma omp parallel
574 {
575 // Thread-private helper
576 arma::vec gv(Naux);
577 gv.zeros();
578 #pragma omp for schedule(dynamic)
579 for(size_t ip=0;ip<orbpairs.size();ip++) {
580 digest_Jexp(P,ip,a_munu[ip],gv);
581 }
582 #pragma omp critical
583 gamma+=gv;
584 }
585 #else
586 // Sequential code
587 for(size_t ip=0;ip<orbpairs.size();ip++)
588 digest_Jexp(P,ip,a_munu[ip],gamma);
589 #endif
590
591 } else {
592 #ifdef _OPENMP
593 #pragma omp parallel
594 {
595 ERIWorker *eri;
596 if(omega==0.0 && alpha==1.0 && beta==0.0)
597 eri=new ERIWorker(maxam,maxcontr);
598 else
599 eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
600
601 arma::vec gv(gamma);
602 #pragma omp for schedule(dynamic)
603 for(size_t ip=0;ip<orbpairs.size();ip++)
604 digest_Jexp(P,ip,compute_a_munu(eri,ip),gv);
605 #pragma omp critical
606 gamma+=gv;
607
608 delete eri;
609 }
610 #else
611 // Sequential code
612
613 ERIWorker *eri;
614 if(omega==0.0 && alpha==1.0 && beta==0.0)
615 eri=new ERIWorker(maxam,maxcontr);
616 else
617 eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
618
619 for(size_t ip=0;ip<orbpairs.size();ip++)
620 digest_Jexp(P,ip,compute_a_munu(eri,ip),gamma);
621
622 delete eri;
623 #endif
624 }
625
626 // Compute and return c
627 if(Bmat) {
628 return ab_inv*gamma;
629 } else {
630 // Compute x0
631 arma::vec x0=ab_inv*gamma;
632 // Compute and return c
633 return x0+ab_inv*(gamma-ab*x0);
634 }
635 }
636
compute_expansion(const std::vector<arma::mat> & P) const637 std::vector<arma::vec> DensityFit::compute_expansion(const std::vector<arma::mat> & P) const {
638 for(size_t i=0;i<P.size();i++) {
639 if(P[i].n_rows != Nbf || P[i].n_cols != Nbf) {
640 std::ostringstream oss;
641 oss << "Error in DensityFit: Nbf = " << Nbf << ", P[" << i << "].n_rows = " << P[i].n_rows << ", P[" << i << "].n_cols = " << P[i].n_cols << "!\n";
642 throw std::logic_error(oss.str());
643 }
644 }
645
646 std::vector<arma::vec> gamma(P.size());
647 for(size_t i=0;i<P.size();i++)
648 gamma[i].zeros(Naux);
649
650 // Compute gamma
651 if(!direct) {
652 for(size_t iden=0;iden<P.size();iden++) {
653 #ifdef _OPENMP
654 #pragma omp parallel
655 #endif
656 {
657 // Thread-private helper
658 #ifdef _OPENMP
659 arma::vec gv(Naux);
660 gv.zeros();
661 #pragma omp for schedule(dynamic)
662 #endif
663 for(size_t ip=0;ip<orbpairs.size();ip++) {
664 #ifdef _OPENMP
665 digest_Jexp(P[iden],ip,a_munu[ip],gv);
666 #else
667 digest_Jexp(P[iden],ip,a_munu[ip],gamma[iden]);
668 #endif
669 }
670 #ifdef _OPENMP
671 #pragma omp critical
672 gamma[iden]+=gv;
673 #endif
674 }
675 }
676
677 } else {
678 for(size_t iden=0;iden<P.size();iden++) {
679 #ifdef _OPENMP
680 #pragma omp parallel
681 #endif
682 {
683 ERIWorker *eri;
684 if(omega==0.0 && alpha==1.0 && beta==0.0)
685 eri=new ERIWorker(maxam,maxcontr);
686 else
687 eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
688
689 #ifdef _OPENMP
690 // Worker stack for each matrix
691 arma::vec gv(gamma[iden]);
692 #pragma omp for schedule(dynamic)
693 #endif
694 for(size_t ip=0;ip<orbpairs.size();ip++) {
695 #ifdef _OPENMP
696 digest_Jexp(P[iden],ip,compute_a_munu(eri,ip),gv);
697 #else
698 digest_Jexp(P[iden],ip,compute_a_munu(eri,ip),gamma[iden]);
699 #endif
700 }
701 #ifdef _OPENMP
702 #pragma omp critical
703 gamma[iden]+=gv;
704 #endif
705
706 delete eri;
707 }
708 }
709 }
710
711 // Compute and return c
712 if(Bmat) {
713 for(size_t ig=0;ig<P.size();ig++)
714 gamma[ig]=ab_inv*gamma[ig];
715
716 } else {
717 for(size_t ig=0;ig<P.size();ig++) {
718 // Compute x0
719 arma::vec x0=ab_inv*gamma[ig];
720 // Compute and return c
721 gamma[ig]=x0+ab_inv*(gamma[ig]-ab*x0);
722 }
723 }
724
725 return gamma;
726 }
727
calcJ(const arma::mat & P) const728 arma::mat DensityFit::calcJ(const arma::mat & P) const {
729 if(P.n_rows != Nbf || P.n_cols != Nbf) {
730 std::ostringstream oss;
731 oss << "Error in DensityFit: Nbf = " << Nbf << ", P.n_rows = " << P.n_rows << ", P.n_cols = " << P.n_cols << "!\n";
732 throw std::logic_error(oss.str());
733 }
734
735 // Get the expansion coefficients
736 arma::vec c=compute_expansion(P);
737
738 arma::mat J(Nbf,Nbf);
739 J.zeros();
740
741 if(!direct) {
742 #ifdef _OPENMP
743 #pragma omp parallel for schedule(dynamic)
744 #endif
745 for(size_t ip=0;ip<orbpairs.size();ip++)
746 digest_J(c,ip,a_munu[ip],J);
747
748 } else {
749 #ifdef _OPENMP
750 #pragma omp parallel
751 #endif
752 {
753 ERIWorker *eri;
754 if(omega==0.0 && alpha==1.0 && beta==0.0)
755 eri=new ERIWorker(maxam,maxcontr);
756 else
757 eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
758
759 #ifdef _OPENMP
760 #pragma omp for
761 #endif
762 for(size_t ip=0;ip<orbpairs.size();ip++)
763 digest_J(c,ip,compute_a_munu(eri,ip),J);
764
765 delete eri;
766 }
767 }
768
769 return J;
770 }
771
calcJ(const std::vector<arma::mat> & P) const772 std::vector<arma::mat> DensityFit::calcJ(const std::vector<arma::mat> & P) const {
773 // Get the expansion coefficients
774 std::vector<arma::vec> c=compute_expansion(P);
775
776 std::vector<arma::mat> J(P.size());
777 for(size_t iden=0;iden<P.size();iden++)
778 J[iden].zeros(Nbf,Nbf);
779
780 if(!direct) {
781 #ifdef _OPENMP
782 #pragma omp parallel for schedule(dynamic)
783 #endif
784 for(size_t ip=0;ip<orbpairs.size();ip++)
785 for(size_t iden=0;iden<P.size();iden++)
786 digest_J(c[iden],ip,a_munu[ip],J[iden]);
787
788 } else {
789 #ifdef _OPENMP
790 #pragma omp parallel
791 #endif
792 {
793 ERIWorker *eri;
794 if(omega==0.0 && alpha==1.0 && beta==0.0)
795 eri=new ERIWorker(maxam,maxcontr);
796 else
797 eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
798
799 #ifdef _OPENMP
800 #pragma omp for schedule(dynamic)
801 #endif
802 for(size_t ip=0;ip<orbpairs.size();ip++) {
803 arma::mat amunu(compute_a_munu(eri,ip));
804 for(size_t iden=0;iden<P.size();iden++)
805 digest_J(c[iden],ip,amunu,J[iden]);
806 }
807
808 delete eri;
809 }
810 }
811
812 return J;
813 }
814
forceJ(const arma::mat & P)815 arma::vec DensityFit::forceJ(const arma::mat & P) {
816 // First, compute the expansion
817 arma::vec c=compute_expansion(P);
818
819 // The force
820 arma::vec f(3*Nnuc);
821 f.zeros();
822
823 // First part: f = *#* 1/2 c_a (a|b)' c_b *#* - gamma_a' c_a
824 #ifdef _OPENMP
825 #pragma omp parallel
826 #endif
827 {
828 dERIWorker *deri;
829 if(omega==0.0 && alpha==1.0 && beta==0.0)
830 deri=new dERIWorker(maxam,maxcontr);
831 else
832 deri=new dERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
833 const std::vector<double> * erip;
834
835 #ifdef _OPENMP
836 // Worker stack for each matrix
837 arma::vec fwrk(f);
838 fwrk.zeros();
839
840 #pragma omp for schedule(dynamic)
841 #endif
842 for(size_t ias=0;ias<auxshells.size();ias++)
843 for(size_t jas=0;jas<=ias;jas++) {
844
845 // Symmetry factor
846 double fac=0.5;
847 if(ias!=jas)
848 fac*=2.0;
849
850 size_t Na=auxshells[ias].get_Nbf();
851 size_t anuc=auxshells[ias].get_center_ind();
852 size_t Nb=auxshells[jas].get_Nbf();
853 size_t bnuc=auxshells[jas].get_center_ind();
854
855 if(anuc==bnuc)
856 // Contributions vanish
857 continue;
858
859 // Compute (a|b)
860 deri->compute(&auxshells[ias],&dummy,&auxshells[jas],&dummy);
861
862 // Compute forces
863 const static int index[]={0, 1, 2, 6, 7, 8};
864 const size_t Nidx=sizeof(index)/sizeof(index[0]);
865 double ders[Nidx];
866
867 for(size_t iid=0;iid<Nidx;iid++) {
868 ders[iid]=0.0;
869 // Index is
870 int ic=index[iid];
871
872 // Increment force, anuc
873 erip=deri->getp(ic);
874 for(size_t iia=0;iia<Na;iia++) {
875 size_t ia=auxshells[ias].get_first_ind()+iia;
876
877 for(size_t iib=0;iib<Nb;iib++) {
878 size_t ib=auxshells[jas].get_first_ind()+iib;
879
880 // The integral is
881 double res=(*erip)[iia*Nb+iib];
882
883 ders[iid]+= res*c(ia)*c(ib);
884 }
885 }
886 ders[iid]*=fac;
887 }
888
889 // Increment forces
890 for(int ic=0;ic<3;ic++) {
891 #ifdef _OPENMP
892 fwrk(3*anuc+ic)+=ders[ic];
893 fwrk(3*bnuc+ic)+=ders[ic+3];
894 #else
895 f(3*anuc+ic)+=ders[ic];
896 f(3*bnuc+ic)+=ders[ic+3];
897 #endif
898 }
899 }
900
901 #ifdef _OPENMP
902 #pragma omp critical
903 // Sum results together
904 f+=fwrk;
905 #endif
906 delete deri;
907 } // end parallel section
908
909
910 // Second part: f = 1/2 c_a (a|b)' c_b *#* - gamma_a' c_a *#*
911 #ifdef _OPENMP
912 #pragma omp parallel
913 #endif
914 {
915 dERIWorker *deri;
916 if(omega==0.0 && alpha==1.0 && beta==0.0)
917 deri=new dERIWorker(maxam,maxcontr);
918 else
919 deri=new dERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
920 const std::vector<double> * erip;
921
922 #ifdef _OPENMP
923 // Worker stack for each matrix
924 arma::vec fwrk(f);
925 fwrk.zeros();
926
927 #pragma omp for schedule(dynamic)
928 #endif
929 for(size_t ip=0;ip<orbpairs.size();ip++) {
930 size_t imus=orbpairs[ip].is;
931 size_t inus=orbpairs[ip].js;
932
933 size_t Nmu=orbshells[imus].get_Nbf();
934 size_t Nnu=orbshells[inus].get_Nbf();
935
936 size_t inuc=orbshells[imus].get_center_ind();
937 size_t jnuc=orbshells[inus].get_center_ind();
938
939 // If imus==inus, we need to take care that we count
940 // every term only once; on the off-diagonal we get
941 // every term twice.
942 double fac=2.0;
943 if(imus==inus)
944 fac=1.0;
945
946 for(size_t ias=0;ias<auxshells.size();ias++) {
947 size_t Na=auxshells[ias].get_Nbf();
948 size_t anuc=auxshells[ias].get_center_ind();
949
950 if(inuc==jnuc && jnuc==anuc)
951 // Contributions vanish
952 continue;
953
954 // Compute (a|mn)
955 deri->compute(&auxshells[ias],&dummy,&orbshells[imus],&orbshells[inus]);
956
957 // Expansion coefficients
958 arma::vec ca=c.subvec(auxshells[ias].get_first_ind(),auxshells[ias].get_last_ind());
959
960 // Compute forces
961 const static int index[]={0, 1, 2, 6, 7, 8, 9, 10, 11};
962 const size_t Nidx=sizeof(index)/sizeof(index[0]);
963 double ders[Nidx];
964
965 for(size_t iid=0;iid<Nidx;iid++) {
966 // Index is
967 int ic=index[iid];
968 arma::vec hlp(Na);
969
970 erip=deri->getp(ic);
971 hlp.zeros();
972 for(size_t iia=0;iia<Na;iia++)
973 for(size_t iimu=0;iimu<Nmu;iimu++) {
974 size_t imu=orbshells[imus].get_first_ind()+iimu;
975 for(size_t iinu=0;iinu<Nnu;iinu++) {
976 size_t inu=orbshells[inus].get_first_ind()+iinu;
977
978 // The contracted integral
979 hlp(iia)+=(*erip)[(iia*Nmu+iimu)*Nnu+iinu]*P(imu,inu);
980 }
981 }
982 ders[iid]=fac*arma::dot(hlp,ca);
983 }
984
985 // Increment forces
986 for(int ic=0;ic<3;ic++) {
987 #ifdef _OPENMP
988 fwrk(3*anuc+ic)-=ders[ic];
989 fwrk(3*inuc+ic)-=ders[ic+3];
990 fwrk(3*jnuc+ic)-=ders[ic+6];
991 #else
992 f(3*anuc+ic)-=ders[ic];
993 f(3*inuc+ic)-=ders[ic+3];
994 f(3*jnuc+ic)-=ders[ic+6];
995 #endif
996 }
997
998 }
999 }
1000
1001 #ifdef _OPENMP
1002 #pragma omp critical
1003 // Sum results together
1004 f+=fwrk;
1005 #endif
1006 delete deri;
1007 } // end parallel section
1008
1009 return f;
1010 }
1011
calcK(const arma::mat & Corig,const std::vector<double> & occo,size_t fitmem) const1012 arma::mat DensityFit::calcK(const arma::mat & Corig, const std::vector<double> & occo, size_t fitmem) const {
1013 if(Corig.n_rows != Nbf) {
1014 std::ostringstream oss;
1015 oss << "Error in DensityFit: Nbf = " << Nbf << ", Corig.n_rows = " << Corig.n_rows << "!\n";
1016 throw std::logic_error(oss.str());
1017 }
1018
1019 // Count number of orbitals
1020 size_t Nmo=0;
1021 for(size_t i=0;i<occo.size();i++)
1022 if(occo[i]>0)
1023 Nmo++;
1024
1025 // Collect orbitals to use
1026 arma::mat C(Nbf,Nmo);
1027 arma::vec occs(Nmo);
1028 {
1029 size_t io=0;
1030 for(size_t i=0;i<occo.size();i++)
1031 if(occo[i]>0) {
1032 // Store orbital and occupation number
1033 C.col(io)=Corig.col(i);
1034 occs[io]=occo[i];
1035 io++;
1036 }
1037 }
1038
1039 // Returned matrix
1040 arma::mat K(Nbf,Nbf);
1041 K.zeros();
1042
1043 if(!direct) {
1044 digest_K_incore(C,occs,K);
1045 } else {
1046 // Determine how many orbitals we can handle at one go. Memory
1047 // needed for one orbital is
1048 size_t oneorb(sizeof(double)*Naux*Nbf);
1049 #ifdef _OPENMP
1050 // Each thread needs its own storage
1051 oneorb*=omp_get_max_threads();
1052 #endif
1053
1054 // Number of orbitals that can be handled in one block is
1055 size_t blocksize(floor(fitmem*1.0/oneorb));
1056 if(blocksize<1) {
1057 std::ostringstream oss;
1058 oss << "Not enough fitting memory! Need at least " << memory_size(oneorb) << " per orbital!\n";
1059 throw std::logic_error(oss.str());
1060 }
1061 // Number of blocks is then
1062 size_t nblocks(ceil(C.n_cols*1.0/blocksize));
1063
1064 //printf("Handling %i orbitals at once.\n",(int) blocksize);
1065
1066 // Loop over blocks
1067 for(size_t iblock=0;iblock<nblocks;iblock++) {
1068 // Block starts at
1069 size_t iobeg(iblock*blocksize);
1070 size_t ioend(std::min((iblock+1)*blocksize-1,(size_t) C.n_cols-1));
1071
1072 //printf("Treating orbitals %i-%i\n",(int) iobeg,(int) ioend);
1073 //fflush(stdout);
1074
1075 digest_K_direct(C.cols(iobeg,ioend),occs.subvec(iobeg,ioend),K);
1076 }
1077 }
1078
1079 return K;
1080 }
1081
calcK(const arma::cx_mat & Corig,const std::vector<double> & occo,size_t fitmem) const1082 arma::cx_mat DensityFit::calcK(const arma::cx_mat & Corig, const std::vector<double> & occo, size_t fitmem) const {
1083 if(Corig.n_rows != Nbf) {
1084 std::ostringstream oss;
1085 oss << "Error in DensityFit: Nbf = " << Nbf << ", Corig.n_rows = " << Corig.n_rows << "!\n";
1086 throw std::logic_error(oss.str());
1087 }
1088
1089 // Count number of orbitals
1090 size_t Nmo=0;
1091 for(size_t i=0;i<occo.size();i++)
1092 if(occo[i]>0)
1093 Nmo++;
1094
1095 // Collect orbitals to use
1096 arma::cx_mat C(Nbf,Nmo);
1097 arma::vec occs(Nmo);
1098 {
1099 size_t io=0;
1100 for(size_t i=0;i<occo.size();i++)
1101 if(occo[i]>0) {
1102 // Store orbital and occupation number
1103 C.col(io)=Corig.col(i);
1104 occs[io]=occo[i];
1105 io++;
1106 }
1107 }
1108
1109 // Returned matrix
1110 arma::cx_mat K(Nbf,Nbf);
1111 K.zeros();
1112
1113 if(!direct) {
1114 digest_K_incore(C,occs,K);
1115 } else {
1116 (void) fitmem;
1117 throw std::logic_error("Direct mode hasn't been implemented for density-fitted complex exchange!\n");
1118 }
1119
1120 return K;
1121 }
1122
get_Norb() const1123 size_t DensityFit::get_Norb() const {
1124 return Nbf;
1125 }
1126
get_Naux() const1127 size_t DensityFit::get_Naux() const {
1128 return Naux;
1129 }
1130
get_ab_inv() const1131 arma::mat DensityFit::get_ab_inv() const {
1132 return ab_inv;
1133 }
1134
B_matrix(arma::mat & B) const1135 void DensityFit::B_matrix(arma::mat & B) const {
1136 if(direct)
1137 throw std::runtime_error("Must run in tabulated mode!\n");
1138 if(!Bmat)
1139 throw std::runtime_error("Must be run in B-matrix mode!\n");
1140
1141 // Collect AO integrals
1142 B.zeros(Nbf*Nbf,Naux);
1143 for(size_t ip=0;ip<orbpairs.size();ip++) {
1144 size_t imus=orbpairs[ip].is;
1145 size_t inus=orbpairs[ip].js;
1146 size_t Nmu=orbshells[imus].get_Nbf();
1147 size_t Nnu=orbshells[inus].get_Nbf();
1148 size_t mu0=orbshells[imus].get_first_ind();
1149 size_t nu0=orbshells[inus].get_first_ind();
1150
1151 const arma::mat & amunu(a_munu[ip]);
1152
1153 for(size_t ias=0;ias<auxshells.size();ias++) {
1154 size_t Na=auxshells[ias].get_Nbf();
1155 size_t a0=auxshells[ias].get_first_ind();
1156
1157 for(size_t imu=0;imu<Nmu;imu++)
1158 for(size_t inu=0;inu<Nnu;inu++)
1159 for(size_t ia=0;ia<Na;ia++) {
1160 size_t mu=imu+mu0;
1161 size_t nu=inu+nu0;
1162 size_t a=ia+a0;
1163
1164 double el(amunu(ia+a0,nu*Nmu+mu));
1165 B(mu*Nbf+nu,a)=el;
1166 B(nu*Nbf+mu,a)=el;
1167 }
1168 }
1169 }
1170 // Transform into proper B matrix
1171 B*=ab_invh;
1172 }
1173