1 /*
2  *                This source code is part of
3  *
4  *                     E  R  K  A  L  E
5  *                             -
6  *                       DFT from Hel
7  *
8  * Written by Susi Lehtola, 2010-2011
9  * Copyright (c) 2010-2011, Susi Lehtola
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public License
13  * as published by the Free Software Foundation; either version 2
14  * of the License, or (at your option) any later version.
15  */
16 
17 #include "density_fitting.h"
18 #include "linalg.h"
19 #include "scf.h"
20 #include "stringutil.h"
21 #include <sstream>
22 
23 #ifdef _OPENMP
24 #include <omp.h>
25 #endif
26 
27 // \delta parameter in Eichkorn et al
28 #define DELTA 1e-9
29 
DensityFit()30 DensityFit::DensityFit() {
31   omega=0.0;
32   alpha=1.0;
33   beta=0.0;
34 }
35 
~DensityFit()36 DensityFit::~DensityFit() {
37 }
38 
set_range_separation(double w,double a,double b)39 void DensityFit::set_range_separation(double w, double a, double b) {
40   omega=w;
41   alpha=a;
42   beta=b;
43 }
44 
get_range_separation(double & w,double & a,double & b) const45 void DensityFit::get_range_separation(double & w, double & a, double & b) const {
46   w=omega;
47   a=alpha;
48   b=beta;
49 }
50 
Bmat_enabled() const51 bool DensityFit::Bmat_enabled() const {
52   return Bmat;
53 }
54 
fill(const BasisSet & orbbas,const BasisSet & auxbas,bool dir,double erithr,double linthr,bool bmat)55 size_t DensityFit::fill(const BasisSet & orbbas, const BasisSet & auxbas, bool dir, double erithr, double linthr, bool bmat) {
56   // Construct density fitting basis
57 
58   // Store amount of functions
59   Nbf=orbbas.get_Nbf();
60   Naux=auxbas.get_Nbf();
61   Nnuc=orbbas.get_Nnuc();
62   direct=dir;
63   Bmat=bmat;
64 
65   // Fill list of shell pairs
66   arma::mat Q, M;
67   orbpairs=orbbas.get_eripairs(Q,M,erithr);
68 
69   // Get orbital shells, auxiliary shells and dummy shell
70   orbshells=orbbas.get_shells();
71   auxshells=auxbas.get_shells();
72   dummy=dummyshell();
73 
74   maxorbam=orbbas.get_max_am();
75   maxauxam=auxbas.get_max_am();
76   maxorbcontr=orbbas.get_max_Ncontr();
77   maxauxcontr=auxbas.get_max_Ncontr();
78   maxam=std::max(orbbas.get_max_am(),auxbas.get_max_am());
79   maxcontr=std::max(orbbas.get_max_Ncontr(),auxbas.get_max_Ncontr());
80 
81   // First, compute the two-center integrals
82   ab.zeros(Naux,Naux);
83 
84   // Get list of unique auxiliary shell pairs
85   std::vector<shellpair_t> auxpairs=auxbas.get_unique_shellpairs();
86 
87 #ifdef _OPENMP
88 #pragma omp parallel
89 #endif
90   {
91     ERIWorker *eri;
92     if(omega==0.0 && alpha==1.0 && beta==0.0)
93       eri=new ERIWorker(maxam,maxcontr);
94     else
95       eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
96     const std::vector<double> * erip;
97 
98 #ifdef _OPENMP
99 #pragma omp for schedule(dynamic)
100 #endif
101     for(size_t ip=0;ip<auxpairs.size();ip++) {
102       // The shells in question are
103       size_t is=auxpairs[ip].is;
104       size_t js=auxpairs[ip].js;
105 
106       // Compute (a|b)
107       eri->compute(&auxshells[is],&dummy,&auxshells[js],&dummy);
108       erip=eri->getp();
109 
110       // Store integrals
111       size_t Ni=auxshells[is].get_Nbf();
112       size_t Nj=auxshells[js].get_Nbf();
113       for(size_t ii=0;ii<Ni;ii++) {
114 	size_t ai=auxshells[is].get_first_ind()+ii;
115 	for(size_t jj=0;jj<Nj;jj++) {
116 	  size_t aj=auxshells[js].get_first_ind()+jj;
117 
118 	  ab(ai,aj)=(*erip)[ii*Nj+jj];
119 	  ab(aj,ai)=(*erip)[ii*Nj+jj];
120 	}
121       }
122     }
123 
124     delete eri;
125   }
126 
127   if(Bmat) {
128     // Form ab^-1 and ab^-1/2
129     arma::mat abvec;
130     arma::vec abval;
131     eig_sym_ordered(abval,abvec,ab);
132 
133     // Count linearly independent vectors
134     size_t Nind=0;
135     for(size_t i=0;i<abval.n_elem;i++)
136       if(abval(i)>=linthr)
137 	Nind++;
138 
139     // and drop the linearly dependent ones
140     abval=abval.subvec(abval.n_elem-Nind,abval.n_elem-1);
141     abvec=abvec.cols(abvec.n_cols-Nind,abvec.n_cols-1);
142 
143     // Form matrices
144     ab_inv.zeros(abvec.n_rows,abvec.n_rows);
145     ab_invh.zeros(abvec.n_rows,abvec.n_rows);
146     for(size_t i=0;i<abval.n_elem;i++) {
147       ab_inv+=abvec.col(i)*arma::trans(abvec.col(i))/abval(i);
148       ab_invh+=abvec.col(i)*arma::trans(abvec.col(i))/sqrt(abval(i));
149     }
150   } else {
151     // Just RI-J(K), so use faster method from Eichkorn et al to form ab_inv only
152     ab_inv=arma::inv(ab + DELTA*arma::eye(ab.n_rows,ab.n_cols));
153   }
154 
155   // Then, compute the three-center integrals
156   if(!direct) {
157     a_munu.resize(orbpairs.size());
158 #ifdef _OPENMP
159 #pragma omp parallel
160 #endif
161     {
162       ERIWorker *eri;
163       if(omega==0.0 && alpha==1.0 && beta==0.0)
164 	eri=new ERIWorker(maxam,maxcontr);
165       else
166 	eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
167 
168 #ifdef _OPENMP
169 #pragma omp for schedule(dynamic)
170 #endif
171       for(size_t ip=0;ip<orbpairs.size();ip++)
172 	a_munu[ip]=compute_a_munu(eri,ip);
173 
174       delete eri;
175     }
176   }
177 
178   return orbpairs.size();
179 }
180 
compute_a_munu(ERIWorker * eri,size_t ip) const181 arma::mat DensityFit::compute_a_munu(ERIWorker *eri, size_t ip) const {
182   // Shells in question are
183   size_t imus=orbpairs[ip].is;
184   size_t inus=orbpairs[ip].js;
185   // Amount of functions
186   size_t Nmu=orbshells[imus].get_Nbf();
187   size_t Nnu=orbshells[inus].get_Nbf();
188 
189   // Allocate storage
190   arma::mat amunu;
191   amunu.zeros(Naux,Nmu*Nnu);
192 #ifdef _OPENMP
193 #pragma omp parallel for schedule(dynamic)
194 #endif
195   for(size_t ia=0;ia<auxshells.size();ia++) {
196     // Number of functions on shell
197     size_t Na=auxshells[ia].get_Nbf();
198     size_t a0=auxshells[ia].get_first_ind();
199 
200     // Compute (a|vu)
201     eri->compute(&auxshells[ia],&dummy,&orbshells[inus],&orbshells[imus]);
202     const std::vector<double> * erip(eri->getp());
203 
204     // Store integrals
205     for(size_t a=0;a<Na;a++)
206       for(size_t imunu=0;imunu<Nmu*Nnu;imunu++)
207 	// Use Fortran ordering so it's compatible with Armadillo
208 	//amunu(a0+a,nu*Nmu+mu)=(*erip)[(a*Nnu+nu)*Nmu+mu];
209 	amunu(a0+a,imunu)=(*erip)[a*Nnu*Nmu+imunu];
210   }
211 
212   return amunu;
213 }
214 
digest_Jexp(const arma::mat & P,size_t ip,const arma::mat & amunu,arma::vec & gamma) const215 void DensityFit::digest_Jexp(const arma::mat & P, size_t ip, const arma::mat & amunu, arma::vec & gamma) const {
216   if(P.n_rows != Nbf || P.n_cols != Nbf) {
217     std::ostringstream oss;
218     oss << "Error in DensityFit: Nbf = " << Nbf << ", P.n_rows = " << P.n_rows << ", P.n_cols = " << P.n_cols << "!\n";
219     throw std::logic_error(oss.str());
220   }
221 
222   // Shells in question are
223   size_t imus=orbpairs[ip].is;
224   size_t inus=orbpairs[ip].js;
225   // First function on shell
226   size_t mubeg=orbshells[imus].get_first_ind();
227   size_t nubeg=orbshells[inus].get_first_ind();
228   // Amount of functions
229   size_t muend=orbshells[imus].get_last_ind();
230   size_t nuend=orbshells[inus].get_last_ind();
231 
232   // Density submatrix
233   arma::vec Psub;
234   if(imus != inus)
235     // On the off-diagonal we're twice degenerate
236     Psub=2.0*arma::vectorise(P.submat(mubeg,nubeg,muend,nuend));
237   else
238     Psub=arma::vectorise(P.submat(mubeg,nubeg,muend,nuend));
239 
240   // Contract over indices
241   gamma+=amunu*Psub;
242 }
243 
digest_J(const arma::mat & gamma,size_t ip,const arma::mat & amunu,arma::mat & J) const244 void DensityFit::digest_J(const arma::mat & gamma, size_t ip, const arma::mat & amunu, arma::mat & J) const {
245   // Shells in question are
246   size_t imus=orbpairs[ip].is;
247   size_t inus=orbpairs[ip].js;
248   // First function on shell
249   size_t mu0=orbshells[imus].get_first_ind();
250   size_t nu0=orbshells[inus].get_first_ind();
251   // Amount of functions
252   size_t Nmu=orbshells[imus].get_Nbf();
253   size_t Nnu=orbshells[inus].get_Nbf();
254 
255   // vec(uv) = c_a (a,uv)
256   arma::mat Jsub(arma::trans(gamma)*amunu);
257   // Reshape into matrix
258   Jsub.reshape(Nmu,Nnu);
259 
260   J.submat(mu0,nu0,mu0+Nmu-1,nu0+Nnu-1)=Jsub;
261   J.submat(nu0,mu0,nu0+Nnu-1,mu0+Nmu-1)=arma::trans(Jsub);
262 }
263 
digest_K_incore(const arma::mat & C,const arma::vec & occs,arma::mat & K) const264 void DensityFit::digest_K_incore(const arma::mat & C, const arma::vec & occs, arma::mat & K) const {
265   if(C.n_rows != Nbf) {
266     std::ostringstream oss;
267     oss << "Error in DensityFit: Nbf = " << Nbf << ", C.n_rows = " << C.n_rows << "!\n";
268     throw std::logic_error(oss.str());
269   }
270 
271   // a_munu contains (a,uv). We want to build the exchange
272   // K_uv = \sum_i n_i (ui|vi) = \sum_i n_i (a|ui) (a|b)^-1 (b|vi)
273   for(size_t io=0;io<C.n_cols;io++) {
274     // Helper array
275     arma::mat aui(Naux,Nbf);
276     aui.zeros();
277 
278     // Fill integrals
279 #ifdef _OPENMP
280 #pragma omp parallel
281 #endif
282     {
283 #ifdef _OPENMP
284       arma::mat hlp(aui);
285 #pragma omp for schedule(dynamic)
286 #endif
287       for(size_t ip=0;ip<orbpairs.size();ip++) {
288 	// Shells in question are
289 	size_t imus=orbpairs[ip].is;
290 	size_t inus=orbpairs[ip].js;
291 	// First function on shell
292 	size_t mu0=orbshells[imus].get_first_ind();
293 	size_t nu0=orbshells[inus].get_first_ind();
294 	// Amount of functions
295 	size_t Nmu=orbshells[imus].get_Nbf();
296 	size_t Nnu=orbshells[inus].get_Nbf();
297 
298 	// The array amunu is stored in the form amunu(a,nu*Nmu+mu) =
299 	// amunu[(nu*Nmu+mu)*Naux+a], so we can reshape it to a
300 	// (Naux*Nmu,Nnu) matrix. Half-transformed (a|u;i) is then
301 	arma::mat ui(arma::reshape(a_munu[ip],Naux*Nmu,Nnu)*C.submat(nu0,io,nu0+Nnu-1,io));
302 	ui.reshape(Naux,Nmu);
303 
304 #ifdef _OPENMP
305 	hlp.cols(mu0,mu0+Nmu-1)+=ui;
306 #else
307 	aui.cols(mu0,mu0+Nmu-1)+=ui;
308 #endif
309 
310 	if(imus != inus) {
311 	  // Get (a|vu)
312 	  arma::mat anumu(Naux,Nmu*Nnu);
313 	  anumu.zeros();
314 	  for(size_t mu=0;mu<Nmu;mu++)
315 	    for(size_t nu=0;nu<Nnu;nu++)
316 	      anumu.col(mu*Nnu+nu)=a_munu[ip].col(nu*Nmu+mu);
317 
318 	  // Half-transformed (a|v;i) is
319 	  arma::mat vi(arma::reshape(anumu,Naux*Nnu,Nmu)*C.submat(mu0,io,mu0+Nmu-1,io));
320 	  vi.reshape(Naux,Nnu);
321 
322 #ifdef _OPENMP
323 	  hlp.cols(nu0,nu0+Nnu-1)+=vi;
324 #else
325 	  aui.cols(nu0,nu0+Nnu-1)+=vi;
326 #endif
327 	}
328       }
329 #ifdef _OPENMP
330 #pragma omp critical
331       aui+=hlp;
332 #endif
333     }
334 
335     // K_uv = (ui|vi) = (a|ui) (a|b)^-1 (b|vi)
336     if(Bmat) {
337       aui = ab_invh*aui;
338       K += occs[io]*arma::trans(aui)*aui;
339     } else {
340       K += occs[io]*arma::trans(aui)*ab_inv*aui;
341     }
342   }
343 }
344 
digest_K_incore(const arma::cx_mat & C,const arma::vec & occs,arma::cx_mat & K) const345 void DensityFit::digest_K_incore(const arma::cx_mat & C, const arma::vec & occs, arma::cx_mat & K) const {
346   if(C.n_rows != Nbf) {
347     std::ostringstream oss;
348     oss << "Error in DensityFit: Nbf = " << Nbf << ", C.n_rows = " << C.n_rows << "!\n";
349     throw std::logic_error(oss.str());
350   }
351 
352   // a_munu contains (a,uv). We want to build the exchange
353   // K_uv = \sum_i n_i (ui|vi) = \sum_i n_i (a|ui) (a|b)^-1 (b|vi)
354   for(size_t io=0;io<C.n_cols;io++) {
355     // Helper array
356     arma::cx_mat aui(Naux,Nbf);
357     aui.zeros();
358 
359     // Fill integrals
360 #ifdef _OPENMP
361 #pragma omp parallel
362 #endif
363     {
364 #ifdef _OPENMP
365       arma::cx_mat hlp(aui);
366 #pragma omp for schedule(dynamic)
367 #endif
368       for(size_t ip=0;ip<orbpairs.size();ip++) {
369 	// Shells in question are
370 	size_t imus=orbpairs[ip].is;
371 	size_t inus=orbpairs[ip].js;
372 	// First function on shell
373 	size_t mu0=orbshells[imus].get_first_ind();
374 	size_t nu0=orbshells[inus].get_first_ind();
375 	// Amount of functions
376 	size_t Nmu=orbshells[imus].get_Nbf();
377 	size_t Nnu=orbshells[inus].get_Nbf();
378 
379 	// The array amunu is stored in the form amunu(a,nu*Nmu+mu) =
380 	// amunu[(nu*Nmu+mu)*Naux+a], so we can reshape it to a
381 	// (Naux*Nmu,Nnu) matrix. Half-transformed (a|u;i) is then
382 	arma::cx_mat ui(arma::reshape(a_munu[ip],Naux*Nmu,Nnu)*C.submat(nu0,io,nu0+Nnu-1,io));
383 	ui.reshape(Naux,Nmu);
384 
385 #ifdef _OPENMP
386 	hlp.cols(mu0,mu0+Nmu-1)+=ui;
387 #else
388 	aui.cols(mu0,mu0+Nmu-1)+=ui;
389 #endif
390 
391 	if(imus != inus) {
392 	  // Get (a|vu)
393 	  arma::mat anumu(Naux,Nmu*Nnu);
394 	  anumu.zeros();
395 	  for(size_t mu=0;mu<Nmu;mu++)
396 	    for(size_t nu=0;nu<Nnu;nu++)
397 	      anumu.col(mu*Nnu+nu)=a_munu[ip].col(nu*Nmu+mu);
398 
399 	  // Half-transformed (a|v;i) is
400 	  arma::cx_mat vi(arma::reshape(anumu,Naux*Nnu,Nmu)*C.submat(mu0,io,mu0+Nmu-1,io));
401 	  vi.reshape(Naux,Nnu);
402 
403 #ifdef _OPENMP
404 	  hlp.cols(nu0,nu0+Nnu-1)+=vi;
405 #else
406 	  aui.cols(nu0,nu0+Nnu-1)+=vi;
407 #endif
408 	}
409       }
410 #ifdef _OPENMP
411 #pragma omp critical
412       aui+=hlp;
413 #endif
414     }
415 
416     // K_uv = (ui|vi) = (a|ui) (a|b)^-1 (b|vi)
417     if(Bmat) {
418       aui = ab_invh*aui;
419       K += occs[io]*arma::trans(aui)*aui;
420     } else {
421       K += occs[io]*arma::trans(aui)*ab_inv*aui;
422     }
423   }
424 }
425 
digest_K_direct(const arma::mat & C,const arma::vec & occs,arma::mat & K) const426 void DensityFit::digest_K_direct(const arma::mat & C, const arma::vec & occs, arma::mat & K) const {
427   if(C.n_rows != Nbf) {
428     std::ostringstream oss;
429     oss << "Error in DensityFit: Nbf = " << Nbf << ", C.n_rows = " << C.n_rows << "!\n";
430     throw std::logic_error(oss.str());
431   }
432 
433   // a_munu contains (a,uv). We want to build the exchange
434   // K_uv = \sum_i n_i (ui|vi) = \sum_i n_i (a|ui) (a|b)^-1 (b|vi)
435 
436   // Stack of helper matrices
437   std::vector<arma::mat> aui(C.n_cols);
438   for(size_t i=0;i<aui.size();i++)
439     aui[i].zeros(Naux,Nbf);
440 
441 #ifdef _OPENMP
442 #pragma omp parallel
443 #endif
444   {
445     // Thread-private helper
446 #ifdef _OPENMP
447     std::vector<arma::mat> auithr(aui);
448 #endif
449 
450     ERIWorker *eri;
451     if(omega==0.0 && alpha==1.0 && beta==0.0)
452       eri=new ERIWorker(maxam,maxcontr);
453     else
454       eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
455 
456 #ifdef _OPENMP
457 #pragma omp for schedule(dynamic)
458 #endif
459     for(size_t ip=0;ip<orbpairs.size();ip++) {
460       // Calculate integrals
461       arma::mat amunu(compute_a_munu(eri,ip));
462       // Shells in question are
463       size_t imus=orbpairs[ip].is;
464       size_t inus=orbpairs[ip].js;
465       // First function on shell
466       size_t mu0=orbshells[imus].get_first_ind();
467       size_t nu0=orbshells[inus].get_first_ind();
468       // Amount of functions
469       size_t Nmu=orbshells[imus].get_Nbf();
470       size_t Nnu=orbshells[inus].get_Nbf();
471 
472       // Get (a|vu)
473       arma::mat anumu;
474       if(imus!=inus) {
475 	anumu.zeros(Naux,Nmu*Nnu);
476 	for(size_t mu=0;mu<Nmu;mu++)
477 	  for(size_t nu=0;nu<Nnu;nu++)
478 	    anumu.col(mu*Nnu+nu)=amunu.col(nu*Nmu+mu);
479       }
480 
481       // Loop over orbitals
482       for(size_t io=0;io<C.n_cols;io++) {
483 	// The array amunu is stored in the form amunu(a,nu*Nmu+mu) =
484 	// amunu[(nu*Nmu+mu)*Naux+a], so we can reshape it to a
485 	// (Naux*Nmu,Nnu) matrix. Half-transformed (a|u;i) is then
486 	arma::mat ui(arma::reshape(amunu,Naux*Nmu,Nnu)*C.submat(nu0,io,nu0+Nnu-1,io));
487 	ui.reshape(Naux,Nmu);
488 
489 #ifdef _OPENMP
490 	auithr[io].cols(mu0,mu0+Nmu-1)+=ui;
491 #else
492 	aui[io].cols(mu0,mu0+Nmu-1)+=ui;
493 #endif
494 
495 	if(imus != inus) {
496 	  // Half-transformed (a|v;i) is
497 	  arma::mat vi(arma::reshape(anumu,Naux*Nnu,Nmu)*C.submat(mu0,io,mu0+Nmu-1,io));
498 	  vi.reshape(Naux,Nnu);
499 
500 #ifdef _OPENMP
501 	  auithr[io].cols(nu0,nu0+Nnu-1)+=vi;
502 #else
503 	  aui[io].cols(nu0,nu0+Nnu-1)+=vi;
504 #endif
505 	}
506       }
507     }
508 
509 #ifdef _OPENMP
510 #pragma omp critical
511     for(size_t io=0;io<C.n_cols;io++)
512       aui[io]+=auithr[io];
513 #endif
514   }
515 
516   // K_uv = (ui|vi) = (a|ui) (a|b)^-1 (b|vi)
517   if(Bmat) {
518     for(size_t io=0;io<C.n_cols;io++) {
519       aui[io] = ab_invh*aui[io];
520       K += occs[io]*arma::trans(aui[io])*aui[io];
521     }
522   } else {
523     for(size_t io=0;io<C.n_cols;io++) {
524       K += occs[io]*arma::trans(aui[io])*ab_inv*aui[io];
525     }
526   }
527 }
528 
memory_estimate(const BasisSet & orbbas,const BasisSet & auxbas,double thr,bool dir) const529 size_t DensityFit::memory_estimate(const BasisSet & orbbas, const BasisSet & auxbas, double thr, bool dir) const {
530   // Amount of auxiliary functions (for representing the electron density)
531   size_t Na=auxbas.get_Nbf();
532   // Amount of memory required for calculation
533   size_t Nmem=0;
534 
535   // Memory taken up by  ( \alpha | \mu \nu)
536   if(!dir) {
537     // Form screening matrix
538     arma::mat Q, M;
539     std::vector<eripair_t> opairs=orbbas.get_eripairs(Q,M,thr);
540 
541     // Count number of function pairs
542     size_t np=0;
543     for(size_t ip=0;ip<opairs.size();ip++)
544       np+=orbbas.get_Nbf(opairs[ip].is)*orbbas.get_Nbf(opairs[ip].js);
545     Nmem+=Na*np*sizeof(double);
546   }
547 
548   // Memory taken by (\alpha | \beta) and its inverse
549   Nmem+=2*Na*Na*sizeof(double);
550   if(Bmat)
551     // We also have (a|b)^(-1/2)
552     Nmem+=Na*Na*sizeof(double);
553 
554   // Memory taken by gamma and expansion coefficients
555   Nmem+=2*Na*sizeof(double);
556 
557   return Nmem;
558 }
559 
compute_expansion(const arma::mat & P) const560 arma::vec DensityFit::compute_expansion(const arma::mat & P) const {
561   if(P.n_rows != Nbf || P.n_cols != Nbf) {
562     std::ostringstream oss;
563     oss << "Error in DensityFit: Nbf = " << Nbf << ", P.n_rows = " << P.n_rows << ", P.n_cols = " << P.n_cols << "!\n";
564     throw std::logic_error(oss.str());
565   }
566 
567   arma::vec gamma(Naux);
568   gamma.zeros();
569 
570   // Compute gamma
571   if(!direct) {
572 #ifdef _OPENMP
573 #pragma omp parallel
574     {
575       // Thread-private helper
576       arma::vec gv(Naux);
577       gv.zeros();
578 #pragma omp for schedule(dynamic)
579       for(size_t ip=0;ip<orbpairs.size();ip++) {
580 	digest_Jexp(P,ip,a_munu[ip],gv);
581       }
582 #pragma omp critical
583       gamma+=gv;
584     }
585 #else
586     // Sequential code
587     for(size_t ip=0;ip<orbpairs.size();ip++)
588       digest_Jexp(P,ip,a_munu[ip],gamma);
589 #endif
590 
591   } else {
592 #ifdef _OPENMP
593 #pragma omp parallel
594     {
595       ERIWorker *eri;
596       if(omega==0.0 && alpha==1.0 && beta==0.0)
597 	eri=new ERIWorker(maxam,maxcontr);
598       else
599 	eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
600 
601       arma::vec gv(gamma);
602 #pragma omp for schedule(dynamic)
603       for(size_t ip=0;ip<orbpairs.size();ip++)
604 	digest_Jexp(P,ip,compute_a_munu(eri,ip),gv);
605 #pragma omp critical
606       gamma+=gv;
607 
608       delete eri;
609     }
610 #else
611     // Sequential code
612 
613     ERIWorker *eri;
614     if(omega==0.0 && alpha==1.0 && beta==0.0)
615       eri=new ERIWorker(maxam,maxcontr);
616     else
617       eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
618 
619     for(size_t ip=0;ip<orbpairs.size();ip++)
620       digest_Jexp(P,ip,compute_a_munu(eri,ip),gamma);
621 
622     delete eri;
623 #endif
624   }
625 
626   // Compute and return c
627   if(Bmat) {
628     return ab_inv*gamma;
629   } else {
630     // Compute x0
631     arma::vec x0=ab_inv*gamma;
632     // Compute and return c
633     return x0+ab_inv*(gamma-ab*x0);
634   }
635 }
636 
compute_expansion(const std::vector<arma::mat> & P) const637 std::vector<arma::vec> DensityFit::compute_expansion(const std::vector<arma::mat> & P) const {
638   for(size_t i=0;i<P.size();i++) {
639     if(P[i].n_rows != Nbf || P[i].n_cols != Nbf) {
640       std::ostringstream oss;
641       oss << "Error in DensityFit: Nbf = " << Nbf << ", P[" << i << "].n_rows = " << P[i].n_rows << ", P[" << i << "].n_cols = " << P[i].n_cols << "!\n";
642       throw std::logic_error(oss.str());
643     }
644   }
645 
646   std::vector<arma::vec> gamma(P.size());
647   for(size_t i=0;i<P.size();i++)
648     gamma[i].zeros(Naux);
649 
650   // Compute gamma
651   if(!direct) {
652     for(size_t iden=0;iden<P.size();iden++) {
653 #ifdef _OPENMP
654 #pragma omp parallel
655 #endif
656       {
657 	// Thread-private helper
658 #ifdef _OPENMP
659 	arma::vec gv(Naux);
660 	gv.zeros();
661 #pragma omp for schedule(dynamic)
662 #endif
663 	for(size_t ip=0;ip<orbpairs.size();ip++) {
664 #ifdef _OPENMP
665 	  digest_Jexp(P[iden],ip,a_munu[ip],gv);
666 #else
667 	  digest_Jexp(P[iden],ip,a_munu[ip],gamma[iden]);
668 #endif
669 	}
670 #ifdef _OPENMP
671 #pragma omp critical
672 	gamma[iden]+=gv;
673 #endif
674       }
675     }
676 
677   } else {
678     for(size_t iden=0;iden<P.size();iden++) {
679 #ifdef _OPENMP
680 #pragma omp parallel
681 #endif
682       {
683 	ERIWorker *eri;
684 	if(omega==0.0 && alpha==1.0 && beta==0.0)
685 	  eri=new ERIWorker(maxam,maxcontr);
686 	else
687 	  eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
688 
689 #ifdef _OPENMP
690 	// Worker stack for each matrix
691 	arma::vec gv(gamma[iden]);
692 #pragma omp for schedule(dynamic)
693 #endif
694 	for(size_t ip=0;ip<orbpairs.size();ip++) {
695 #ifdef _OPENMP
696 	  digest_Jexp(P[iden],ip,compute_a_munu(eri,ip),gv);
697 #else
698 	  digest_Jexp(P[iden],ip,compute_a_munu(eri,ip),gamma[iden]);
699 #endif
700 	}
701 #ifdef _OPENMP
702 #pragma omp critical
703 	gamma[iden]+=gv;
704 #endif
705 
706 	delete eri;
707       }
708     }
709   }
710 
711   // Compute and return c
712   if(Bmat) {
713     for(size_t ig=0;ig<P.size();ig++)
714       gamma[ig]=ab_inv*gamma[ig];
715 
716   } else {
717     for(size_t ig=0;ig<P.size();ig++) {
718       // Compute x0
719       arma::vec x0=ab_inv*gamma[ig];
720       // Compute and return c
721       gamma[ig]=x0+ab_inv*(gamma[ig]-ab*x0);
722     }
723   }
724 
725   return gamma;
726 }
727 
calcJ(const arma::mat & P) const728 arma::mat DensityFit::calcJ(const arma::mat & P) const {
729   if(P.n_rows != Nbf || P.n_cols != Nbf) {
730     std::ostringstream oss;
731     oss << "Error in DensityFit: Nbf = " << Nbf << ", P.n_rows = " << P.n_rows << ", P.n_cols = " << P.n_cols << "!\n";
732     throw std::logic_error(oss.str());
733   }
734 
735   // Get the expansion coefficients
736   arma::vec c=compute_expansion(P);
737 
738   arma::mat J(Nbf,Nbf);
739   J.zeros();
740 
741   if(!direct) {
742 #ifdef _OPENMP
743 #pragma omp parallel for schedule(dynamic)
744 #endif
745     for(size_t ip=0;ip<orbpairs.size();ip++)
746       digest_J(c,ip,a_munu[ip],J);
747 
748   } else {
749 #ifdef _OPENMP
750 #pragma omp parallel
751 #endif
752     {
753       ERIWorker *eri;
754       if(omega==0.0 && alpha==1.0 && beta==0.0)
755 	eri=new ERIWorker(maxam,maxcontr);
756       else
757 	eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
758 
759 #ifdef _OPENMP
760 #pragma omp for
761 #endif
762       for(size_t ip=0;ip<orbpairs.size();ip++)
763 	digest_J(c,ip,compute_a_munu(eri,ip),J);
764 
765       delete eri;
766     }
767   }
768 
769   return J;
770 }
771 
calcJ(const std::vector<arma::mat> & P) const772 std::vector<arma::mat> DensityFit::calcJ(const std::vector<arma::mat> & P) const {
773   // Get the expansion coefficients
774   std::vector<arma::vec> c=compute_expansion(P);
775 
776   std::vector<arma::mat> J(P.size());
777   for(size_t iden=0;iden<P.size();iden++)
778     J[iden].zeros(Nbf,Nbf);
779 
780   if(!direct) {
781 #ifdef _OPENMP
782 #pragma omp parallel for schedule(dynamic)
783 #endif
784     for(size_t ip=0;ip<orbpairs.size();ip++)
785       for(size_t iden=0;iden<P.size();iden++)
786 	digest_J(c[iden],ip,a_munu[ip],J[iden]);
787 
788   } else {
789 #ifdef _OPENMP
790 #pragma omp parallel
791 #endif
792     {
793       ERIWorker *eri;
794       if(omega==0.0 && alpha==1.0 && beta==0.0)
795 	eri=new ERIWorker(maxam,maxcontr);
796       else
797 	eri=new ERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
798 
799 #ifdef _OPENMP
800 #pragma omp for schedule(dynamic)
801 #endif
802       for(size_t ip=0;ip<orbpairs.size();ip++) {
803 	arma::mat amunu(compute_a_munu(eri,ip));
804 	for(size_t iden=0;iden<P.size();iden++)
805 	  digest_J(c[iden],ip,amunu,J[iden]);
806       }
807 
808       delete eri;
809     }
810   }
811 
812   return J;
813 }
814 
forceJ(const arma::mat & P)815 arma::vec DensityFit::forceJ(const arma::mat & P) {
816   // First, compute the expansion
817   arma::vec c=compute_expansion(P);
818 
819   // The force
820   arma::vec f(3*Nnuc);
821   f.zeros();
822 
823   // First part: f = *#* 1/2 c_a (a|b)' c_b *#* - gamma_a' c_a
824 #ifdef _OPENMP
825 #pragma omp parallel
826 #endif
827   {
828     dERIWorker *deri;
829     if(omega==0.0 && alpha==1.0 && beta==0.0)
830       deri=new dERIWorker(maxam,maxcontr);
831     else
832       deri=new dERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
833     const std::vector<double> * erip;
834 
835 #ifdef _OPENMP
836     // Worker stack for each matrix
837     arma::vec fwrk(f);
838     fwrk.zeros();
839 
840 #pragma omp for schedule(dynamic)
841 #endif
842     for(size_t ias=0;ias<auxshells.size();ias++)
843       for(size_t jas=0;jas<=ias;jas++) {
844 
845 	// Symmetry factor
846 	double fac=0.5;
847 	if(ias!=jas)
848 	  fac*=2.0;
849 
850 	size_t Na=auxshells[ias].get_Nbf();
851 	size_t anuc=auxshells[ias].get_center_ind();
852 	size_t Nb=auxshells[jas].get_Nbf();
853 	size_t bnuc=auxshells[jas].get_center_ind();
854 
855 	if(anuc==bnuc)
856 	  // Contributions vanish
857 	  continue;
858 
859 	// Compute (a|b)
860 	deri->compute(&auxshells[ias],&dummy,&auxshells[jas],&dummy);
861 
862 	// Compute forces
863 	const static int index[]={0, 1, 2, 6, 7, 8};
864 	const size_t Nidx=sizeof(index)/sizeof(index[0]);
865 	double ders[Nidx];
866 
867 	for(size_t iid=0;iid<Nidx;iid++) {
868 	  ders[iid]=0.0;
869 	  // Index is
870 	  int ic=index[iid];
871 
872 	  // Increment force, anuc
873 	  erip=deri->getp(ic);
874 	  for(size_t iia=0;iia<Na;iia++) {
875 	    size_t ia=auxshells[ias].get_first_ind()+iia;
876 
877 	    for(size_t iib=0;iib<Nb;iib++) {
878 	      size_t ib=auxshells[jas].get_first_ind()+iib;
879 
880 	      // The integral is
881 	      double res=(*erip)[iia*Nb+iib];
882 
883 	      ders[iid]+= res*c(ia)*c(ib);
884 	    }
885 	  }
886 	  ders[iid]*=fac;
887 	}
888 
889 	// Increment forces
890 	for(int ic=0;ic<3;ic++) {
891 #ifdef _OPENMP
892 	  fwrk(3*anuc+ic)+=ders[ic];
893 	  fwrk(3*bnuc+ic)+=ders[ic+3];
894 #else
895 	  f(3*anuc+ic)+=ders[ic];
896 	  f(3*bnuc+ic)+=ders[ic+3];
897 #endif
898 	}
899       }
900 
901 #ifdef _OPENMP
902 #pragma omp critical
903     // Sum results together
904     f+=fwrk;
905 #endif
906     delete deri;
907   } // end parallel section
908 
909 
910     // Second part: f = 1/2 c_a (a|b)' c_b *#* - gamma_a' c_a *#*
911 #ifdef _OPENMP
912 #pragma omp parallel
913 #endif
914   {
915     dERIWorker *deri;
916     if(omega==0.0 && alpha==1.0 && beta==0.0)
917       deri=new dERIWorker(maxam,maxcontr);
918     else
919       deri=new dERIWorker_srlr(maxam,maxcontr,omega,alpha,beta);
920     const std::vector<double> * erip;
921 
922 #ifdef _OPENMP
923     // Worker stack for each matrix
924     arma::vec fwrk(f);
925     fwrk.zeros();
926 
927 #pragma omp for schedule(dynamic)
928 #endif
929     for(size_t ip=0;ip<orbpairs.size();ip++) {
930       size_t imus=orbpairs[ip].is;
931       size_t inus=orbpairs[ip].js;
932 
933       size_t Nmu=orbshells[imus].get_Nbf();
934       size_t Nnu=orbshells[inus].get_Nbf();
935 
936       size_t inuc=orbshells[imus].get_center_ind();
937       size_t jnuc=orbshells[inus].get_center_ind();
938 
939       // If imus==inus, we need to take care that we count
940       // every term only once; on the off-diagonal we get
941       // every term twice.
942       double fac=2.0;
943       if(imus==inus)
944 	fac=1.0;
945 
946       for(size_t ias=0;ias<auxshells.size();ias++) {
947 	size_t Na=auxshells[ias].get_Nbf();
948 	size_t anuc=auxshells[ias].get_center_ind();
949 
950 	if(inuc==jnuc && jnuc==anuc)
951 	  // Contributions vanish
952 	  continue;
953 
954 	// Compute (a|mn)
955 	deri->compute(&auxshells[ias],&dummy,&orbshells[imus],&orbshells[inus]);
956 
957 	// Expansion coefficients
958 	arma::vec ca=c.subvec(auxshells[ias].get_first_ind(),auxshells[ias].get_last_ind());
959 
960 	// Compute forces
961 	const static int index[]={0, 1, 2, 6, 7, 8, 9, 10, 11};
962 	const size_t Nidx=sizeof(index)/sizeof(index[0]);
963 	double ders[Nidx];
964 
965 	for(size_t iid=0;iid<Nidx;iid++) {
966 	  // Index is
967 	  int ic=index[iid];
968 	  arma::vec hlp(Na);
969 
970 	  erip=deri->getp(ic);
971 	  hlp.zeros();
972 	  for(size_t iia=0;iia<Na;iia++)
973 	    for(size_t iimu=0;iimu<Nmu;iimu++) {
974 	      size_t imu=orbshells[imus].get_first_ind()+iimu;
975 	      for(size_t iinu=0;iinu<Nnu;iinu++) {
976 		size_t inu=orbshells[inus].get_first_ind()+iinu;
977 
978 		// The contracted integral
979 		hlp(iia)+=(*erip)[(iia*Nmu+iimu)*Nnu+iinu]*P(imu,inu);
980 	      }
981 	    }
982 	  ders[iid]=fac*arma::dot(hlp,ca);
983 	}
984 
985 	// Increment forces
986 	for(int ic=0;ic<3;ic++) {
987 #ifdef _OPENMP
988 	  fwrk(3*anuc+ic)-=ders[ic];
989 	  fwrk(3*inuc+ic)-=ders[ic+3];
990 	  fwrk(3*jnuc+ic)-=ders[ic+6];
991 #else
992 	  f(3*anuc+ic)-=ders[ic];
993 	  f(3*inuc+ic)-=ders[ic+3];
994 	  f(3*jnuc+ic)-=ders[ic+6];
995 #endif
996 	}
997 
998       }
999     }
1000 
1001 #ifdef _OPENMP
1002 #pragma omp critical
1003     // Sum results together
1004     f+=fwrk;
1005 #endif
1006     delete deri;
1007   } // end parallel section
1008 
1009   return f;
1010 }
1011 
calcK(const arma::mat & Corig,const std::vector<double> & occo,size_t fitmem) const1012 arma::mat DensityFit::calcK(const arma::mat & Corig, const std::vector<double> & occo, size_t fitmem) const {
1013   if(Corig.n_rows != Nbf) {
1014     std::ostringstream oss;
1015     oss << "Error in DensityFit: Nbf = " << Nbf << ", Corig.n_rows = " << Corig.n_rows << "!\n";
1016     throw std::logic_error(oss.str());
1017   }
1018 
1019   // Count number of orbitals
1020   size_t Nmo=0;
1021   for(size_t i=0;i<occo.size();i++)
1022     if(occo[i]>0)
1023       Nmo++;
1024 
1025   // Collect orbitals to use
1026   arma::mat C(Nbf,Nmo);
1027   arma::vec occs(Nmo);
1028   {
1029     size_t io=0;
1030     for(size_t i=0;i<occo.size();i++)
1031       if(occo[i]>0) {
1032 	// Store orbital and occupation number
1033 	C.col(io)=Corig.col(i);
1034 	occs[io]=occo[i];
1035 	io++;
1036       }
1037   }
1038 
1039   // Returned matrix
1040   arma::mat K(Nbf,Nbf);
1041   K.zeros();
1042 
1043   if(!direct) {
1044     digest_K_incore(C,occs,K);
1045   } else {
1046     // Determine how many orbitals we can handle at one go. Memory
1047     // needed for one orbital is
1048     size_t oneorb(sizeof(double)*Naux*Nbf);
1049 #ifdef _OPENMP
1050     // Each thread needs its own storage
1051     oneorb*=omp_get_max_threads();
1052 #endif
1053 
1054     // Number of orbitals that can be handled in one block is
1055     size_t blocksize(floor(fitmem*1.0/oneorb));
1056     if(blocksize<1) {
1057       std::ostringstream oss;
1058       oss << "Not enough fitting memory! Need at least " << memory_size(oneorb) << " per orbital!\n";
1059       throw std::logic_error(oss.str());
1060     }
1061     // Number of blocks is then
1062     size_t nblocks(ceil(C.n_cols*1.0/blocksize));
1063 
1064     //printf("Handling %i orbitals at once.\n",(int) blocksize);
1065 
1066     // Loop over blocks
1067     for(size_t iblock=0;iblock<nblocks;iblock++) {
1068       // Block starts at
1069       size_t iobeg(iblock*blocksize);
1070       size_t ioend(std::min((iblock+1)*blocksize-1,(size_t) C.n_cols-1));
1071 
1072       //printf("Treating orbitals %i-%i\n",(int) iobeg,(int) ioend);
1073       //fflush(stdout);
1074 
1075       digest_K_direct(C.cols(iobeg,ioend),occs.subvec(iobeg,ioend),K);
1076     }
1077   }
1078 
1079   return K;
1080 }
1081 
calcK(const arma::cx_mat & Corig,const std::vector<double> & occo,size_t fitmem) const1082 arma::cx_mat DensityFit::calcK(const arma::cx_mat & Corig, const std::vector<double> & occo, size_t fitmem) const {
1083   if(Corig.n_rows != Nbf) {
1084     std::ostringstream oss;
1085     oss << "Error in DensityFit: Nbf = " << Nbf << ", Corig.n_rows = " << Corig.n_rows << "!\n";
1086     throw std::logic_error(oss.str());
1087   }
1088 
1089   // Count number of orbitals
1090   size_t Nmo=0;
1091   for(size_t i=0;i<occo.size();i++)
1092     if(occo[i]>0)
1093       Nmo++;
1094 
1095   // Collect orbitals to use
1096   arma::cx_mat C(Nbf,Nmo);
1097   arma::vec occs(Nmo);
1098   {
1099     size_t io=0;
1100     for(size_t i=0;i<occo.size();i++)
1101       if(occo[i]>0) {
1102 	// Store orbital and occupation number
1103 	C.col(io)=Corig.col(i);
1104 	occs[io]=occo[i];
1105 	io++;
1106       }
1107   }
1108 
1109   // Returned matrix
1110   arma::cx_mat K(Nbf,Nbf);
1111   K.zeros();
1112 
1113   if(!direct) {
1114     digest_K_incore(C,occs,K);
1115   } else {
1116     (void) fitmem;
1117     throw std::logic_error("Direct mode hasn't been implemented for density-fitted complex exchange!\n");
1118   }
1119 
1120   return K;
1121 }
1122 
get_Norb() const1123 size_t DensityFit::get_Norb() const {
1124   return Nbf;
1125 }
1126 
get_Naux() const1127 size_t DensityFit::get_Naux() const {
1128   return Naux;
1129 }
1130 
get_ab_inv() const1131 arma::mat DensityFit::get_ab_inv() const {
1132   return ab_inv;
1133 }
1134 
B_matrix(arma::mat & B) const1135 void DensityFit::B_matrix(arma::mat & B) const {
1136   if(direct)
1137     throw std::runtime_error("Must run in tabulated mode!\n");
1138   if(!Bmat)
1139     throw std::runtime_error("Must be run in B-matrix mode!\n");
1140 
1141   // Collect AO integrals
1142   B.zeros(Nbf*Nbf,Naux);
1143   for(size_t ip=0;ip<orbpairs.size();ip++) {
1144     size_t imus=orbpairs[ip].is;
1145     size_t inus=orbpairs[ip].js;
1146     size_t Nmu=orbshells[imus].get_Nbf();
1147     size_t Nnu=orbshells[inus].get_Nbf();
1148     size_t mu0=orbshells[imus].get_first_ind();
1149     size_t nu0=orbshells[inus].get_first_ind();
1150 
1151     const arma::mat & amunu(a_munu[ip]);
1152 
1153     for(size_t ias=0;ias<auxshells.size();ias++) {
1154       size_t Na=auxshells[ias].get_Nbf();
1155       size_t a0=auxshells[ias].get_first_ind();
1156 
1157       for(size_t imu=0;imu<Nmu;imu++)
1158 	for(size_t inu=0;inu<Nnu;inu++)
1159 	  for(size_t ia=0;ia<Na;ia++) {
1160 	    size_t mu=imu+mu0;
1161 	    size_t nu=inu+nu0;
1162 	    size_t a=ia+a0;
1163 
1164 	    double el(amunu(ia+a0,nu*Nmu+mu));
1165 	    B(mu*Nbf+nu,a)=el;
1166 	    B(nu*Nbf+mu,a)=el;
1167 	  }
1168     }
1169   }
1170   // Transform into proper B matrix
1171   B*=ab_invh;
1172 }
1173