1 /*
2  *                This source code is part of
3  *
4  *                     E  R  K  A  L  E
5  *                             -
6  *                       DFT from Hel
7  *
8  * Written by Susi Lehtola, 2010-2011
9  * Copyright (c) 2010-2011, Susi Lehtola
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public License
13  * as published by the Free Software Foundation; either version 2
14  * of the License, or (at your option) any later version.
15  */
16 
17 #include <cfloat>
18 #include <cmath>
19 #include <cstdio>
20 // LibXC
21 #include <xc.h>
22 
23 #include "dftfuncs.h"
24 #include "dftgrid.h"
25 #include "elements.h"
26 #include "hirshfeld.h"
27 #include "mathf.h"
28 // Lobatto or Lebedev for angular
29 #include "lobatto.h"
30 #include "lebedev.h"
31 // Gauss-Chebyshev for radial integral
32 #include "chebyshev.h"
33 
34 #include "settings.h"
35 #include "stringutil.h"
36 #include "timer.h"
37 
38 // OpenMP parallellization for XC calculations
39 #ifdef _OPENMP
40 #include <omp.h>
41 #endif
42 
43 extern Settings settings;
44 
45 /* Partitioning functions */
f_p(double mu)46 inline double f_p(double mu) {
47   return 1.5*mu-0.5*mu*mu*mu;
48 }
49 
f_q(double mu,double a)50 inline double f_q(double mu, double a) {
51   if(mu<-a)
52     return -1.0;
53   else if(mu<a)
54     return f_p(mu/a);
55   else
56     return 1.0;
57 }
58 
f_s(double mu,double a)59 inline double f_s(double mu, double a) {
60   return 0.5*(1.0-f_p(f_p(f_q(mu,a))));
61 }
62 
operator <(const dens_list_t & lhs,const dens_list_t & rhs)63 bool operator<(const dens_list_t &lhs, const dens_list_t & rhs) {
64   // Sort in decreasing order
65   return lhs.d > rhs.d;
66 }
67 
adaptive_l0()68 static int adaptive_l0() {
69   return 3;
70 }
71 
krack_nrad(double stol,int Z)72 static int krack_nrad(double stol, int Z) {
73   return std::max(20,(int) round(-5*(3*log10(stol)+8-(1+element_row[Z]))));
74 }
75 
krack_lmax(double stol)76 static int krack_lmax(double stol) {
77   /*
78     The default parameters in the paper
79 
80     return (int) ceil(2.0-3.0*log10(stol));
81 
82     give a very poor grid for e.g. CCl4: an 1e-5 tolerance yields a
83     grid with -3.7 electrons error even in def2-SVP. As a quick hack,
84     the parameters are re-chosen as the average of the old ones and of
85     the ones from Koster which are meant for integrating the Fock
86     matrix.
87   */
88 
89   return (int) ceil(3.5-4.5*log10(stol));
90 }
91 
koster_nrad(double ftol,int Z)92 static int koster_nrad(double ftol, int Z) {
93   return std::max(20,(int) round(-5*(3*log10(ftol)+6-(1+element_row[Z]))));
94 }
95 
koster_lmax(double ftol)96 static int koster_lmax(double ftol) {
97   return (int) ceil(5.0-6.0*log10(ftol));
98 }
99 
AngularGrid(bool lobatto_)100 AngularGrid::AngularGrid(bool lobatto_) : use_lobatto(lobatto_) {
101   do_grad=false;
102   do_tau=false;
103   do_lapl=false;
104   do_hess=false;
105   do_lgrad=false;
106 }
107 
~AngularGrid()108 AngularGrid::~AngularGrid() {
109 }
110 
set_basis(const BasisSet & basis)111 void AngularGrid::set_basis(const BasisSet & basis) {
112   basp=&basis;
113 }
114 
set_grid(const angshell_t & sh)115 void AngularGrid::set_grid(const angshell_t & sh) {
116   info=sh;
117 }
118 
get_grid() const119 std::vector<gridpoint_t> AngularGrid::get_grid() const {
120   return grid;
121 }
122 
lobatto_shell()123 void AngularGrid::lobatto_shell() {
124   // Add points on ind:th radial shell.
125 
126   // Get parameters of shell
127   const double rad(info.R);
128   const int l(info.l);
129   const double wrad(info.w);
130 
131   // Number of points in theta
132   int nth=(l+3)/2;
133 
134   // Get corresponding Lobatto quadrature rule points in theta
135   std::vector<double> xl, wl;
136   lobatto_compute(nth,xl,wl);
137 
138   // Clear grid
139   grid.clear();
140 
141   // Number of points on this shell
142   size_t np=0;
143 
144   // Loop over points in theta
145   for(int ith=0;ith<nth;ith++) {
146     // Compute cos(th) and sin(th);
147 
148     double cth=xl[ith];
149     double sth=sqrt(1-cth*cth);
150 
151     // Determine number of points in phi, defined by smallest integer for which
152     // sin^{nphi} \theta < THR
153     double thr;
154     if(l<=50)
155       thr=1e-8;
156     else
157       thr=1e-9;
158 
159     // Calculate nphi
160     int nphi=1;
161     double t=sth;
162     while(t>=thr && nphi<=l+1) {
163       nphi++;
164       t*=sth;
165     }
166 
167     // Use an offset in phi?
168     double phioff=0.0;
169     if(ith%2)
170       phioff=M_PI/nphi;
171 
172     // Now, generate the points.
173     gridpoint_t point;
174     double phi, dphi; // Value of phi and the increment
175     double cph, sph; // Sine and cosine of phi
176 
177     dphi=2.0*M_PI/nphi;
178 
179     // Total weight of points on this ring is
180     // (disregarding weight from Becke partitioning)
181     point.w=2.0*M_PI*wl[ith]/nphi*wrad;
182 
183     for(int iphi=0;iphi<nphi;iphi++) {
184       // Value of phi is
185       phi=iphi*dphi+phioff;
186       // and the sine and cosine are
187       sph=sin(phi);
188       cph=cos(phi);
189 
190       // Compute x, y and z
191       point.r.x=rad*sth*cph;
192       point.r.y=rad*sth*sph;
193       point.r.z=rad*cth;
194 
195       // Displace to center
196       point.r=point.r+info.cen;
197 
198       // Add point
199       grid.push_back(point);
200       // Increment number of points
201       np++;
202     }
203   }
204 
205   // Store number of points on this shell
206   info.np=np;
207 }
208 
lebedev_shell()209 void AngularGrid::lebedev_shell() {
210   // Parameters of shell
211   const double rad(info.R);
212   const int l(info.l);
213   const double wrad(info.w);
214 
215   // Get quadrature rule
216   std::vector<lebedev_point_t> points=lebedev_sphere(l);
217 
218   // Number of points on this shell
219   size_t np=points.size();
220 
221   // Loop over points
222   for(size_t i=0;i<points.size();i++) {
223     gridpoint_t point;
224 
225     point.r.x=rad*points[i].x;
226     point.r.y=rad*points[i].y;
227     point.r.z=rad*points[i].z;
228     // Displace to center
229     point.r=point.r+info.cen;
230 
231     // Compute quadrature weight
232     // (Becke weight not included)
233     point.w=wrad*points[i].w;
234 
235     // Add point
236     grid.push_back(point);
237   }
238 
239   // Store number of points on this shell
240   info.np=np;
241 }
242 
becke_weights(double a)243 void AngularGrid::becke_weights(double a) {
244   // Compute weights of points.
245 
246   // Number of atoms in system
247   const size_t Nat=basp->get_Nnuc();
248 
249   // Helper arrays
250   arma::vec atom_dist;
251   arma::vec atom_weight;
252   arma::mat mu_ab;
253   arma::mat smu_ab;
254   // Indices to consider
255   std::vector<size_t> cmpidx;
256 
257   // Initialize memory
258   atom_dist.zeros(Nat);
259   atom_weight.zeros(Nat);
260   mu_ab.zeros(Nat,Nat);
261   smu_ab.zeros(Nat,Nat);
262 
263   // Get nuclei
264   std::vector<nucleus_t> nuccoords=basp->get_nuclei();
265 
266   // Get nuclear distances
267   arma::mat nucdist=basp->nuclear_distances();
268   // Compute closest distance to other atoms
269   double Rin=DBL_MAX;
270   for(size_t i=0;i<info.atind;i++)
271     if(nucdist(info.atind,i)<Rin)
272       Rin=nucdist(info.atind,i);
273   for(size_t i=info.atind+1;i<Nat;i++)
274     if(nucdist(info.atind,i)<Rin)
275       Rin=nucdist(info.atind,i);
276 
277   double scrthr=std::pow(0.5*(1-a)*Rin,2);
278 
279   // Loop over grid
280   for(size_t ip=0;ip<grid.size();ip++) {
281     // Coordinates of the point are
282     coords_t coord_p=grid[ip].r;
283 
284     // Prescreen - is the weight unity?
285     if(normsq(nuccoords[info.atind].r-coord_p) < scrthr)
286       // Yes - nothing to do.
287       continue;
288 
289     // Compute distance of point to atoms
290     for(size_t iat=0;iat<Nat;iat++)
291       atom_dist(iat)=norm(nuccoords[iat].r-coord_p);
292 
293     // Compute mu_ab
294     for(size_t iat=0;iat<Nat;iat++) {
295       // Diagonal
296       mu_ab(iat,iat)=0.0;
297       // Off-diagonal
298       for(size_t jat=0;jat<iat;jat++) {
299 	mu_ab(iat,jat)=(atom_dist(iat)-atom_dist(jat))/nucdist(iat,jat);
300 	mu_ab(jat,iat)=-mu_ab(iat,jat);
301       }
302     }
303 
304     // Compute s(mu_ab)
305     for(size_t iat=0;iat<Nat;iat++)
306       for(size_t jat=0;jat<Nat;jat++) {
307 	smu_ab(iat,jat)=f_s(mu_ab(iat,jat),a);
308       }
309 
310     // Then, compute atomic weights
311     for(size_t iat=0;iat<Nat;iat++) {
312       atom_weight(iat)=1.0;
313 
314       for(size_t jat=0;jat<iat;jat++)
315 	atom_weight(iat)*=smu_ab(iat,jat);
316       for(size_t jat=iat+1;jat<Nat;jat++)
317 	atom_weight(iat)*=smu_ab(iat,jat);
318     }
319 
320     // The Becke weight is
321     grid[ip].w*=atom_weight(info.atind)/arma::sum(atom_weight);
322   }
323 }
324 
hirshfeld_weights(const Hirshfeld & hirsh)325 void AngularGrid::hirshfeld_weights(const Hirshfeld & hirsh) {
326   // Compute weights of points.
327   for(size_t ip=0;ip<grid.size();ip++)
328     // The Hirshfeld weight is
329     grid[ip].w*=hirsh.get_weight(info.atind,grid[ip].r);
330 }
331 
prune_points()332 void AngularGrid::prune_points() {
333   // Prune points with small weight.
334   for(size_t i=grid.size()-1;i<grid.size();i--)
335     if(grid[i].w<=info.tol)
336       grid.erase(grid.begin()+i);
337 
338   // Update amont of points
339   info.np=grid.size();
340 }
341 
free()342 void AngularGrid::free() {
343   // Free integration points
344   grid.clear();
345   w.clear();
346 
347   // Free basis lists
348   pot_shells.clear();
349   pot_bf_ind.clear();
350   shells.clear();
351   bf_ind.clear();
352   bf_potind.clear();
353 
354   // Free values of basis functions
355   bf.clear();
356   bf_x.clear();
357   bf_y.clear();
358   bf_z.clear();
359   bf_lapl.clear();
360 
361   // Free density stuff
362   Pv.clear();
363   Pv_x.clear();
364   Pv_y.clear();
365   Pv_z.clear();
366   Pav.clear();
367   Pav_x.clear();
368   Pav_y.clear();
369   Pav_z.clear();
370   Pbv.clear();
371   Pbv_x.clear();
372   Pbv_y.clear();
373   Pbv_z.clear();
374 
375   // Free LDA stuff
376   rho.clear();
377   exc.clear();
378   vxc.clear();
379 
380   // Free GGA stuff
381   grho.clear();
382   sigma.clear();
383   vsigma.clear();
384 
385   // Free mGGA stuff
386   lapl.clear();
387   vlapl.clear();
388   tau.clear();
389   vtau.clear();
390 
391   // Free VV10 stuff
392   VV10_arr.clear();
393 }
394 
screen_density(double thr) const395 arma::uvec AngularGrid::screen_density(double thr) const {
396   // List of points
397   std::vector<size_t> idx;
398   // Loop over grid
399   if(!polarized) {
400     if(rho.size() != grid.size()) throw std::logic_error("Wrong size density!\n");
401     for(size_t i=0;i<grid.size();i++)
402       if(rho(i)>=thr)
403 	idx.push_back(i);
404   } else {
405     if(rho.size() != 2*grid.size()) throw std::logic_error("Wrong size density!\n");
406     for(size_t i=0;i<grid.size();i++)
407       if((rho(2*i)+rho(2*i+1))>=thr)
408 	idx.push_back(i);
409   }
410 
411   return arma::conv_to<arma::uvec>::from(idx);
412 }
413 
414 
update_density(const arma::mat & P0)415 void AngularGrid::update_density(const arma::mat & P0) {
416   // Update values of densitty
417 
418   if(!P0.n_elem) {
419     ERROR_INFO();
420     throw std::runtime_error("Error - density matrix is empty!\n");
421   }
422 
423   // Non-polarized calculation.
424   polarized=false;
425 
426   // Update density vector
427   arma::mat P(P0.submat(bf_ind,bf_ind));
428   Pv=P*bf;
429 
430   // Calculate density
431   rho.zeros(1,grid.size());
432   for(size_t ip=0;ip<grid.size();ip++)
433     rho(0,ip)=arma::dot(Pv.col(ip),bf.col(ip));
434 
435   // Calculate gradient
436   if(do_grad) {
437     grho.zeros(3,grid.size());
438     sigma.zeros(1,grid.size());
439     for(size_t ip=0;ip<grid.size();ip++) {
440       // Calculate values
441       double gx=grho(0,ip)=2.0*arma::dot(Pv.col(ip),bf_x.col(ip));
442       double gy=grho(1,ip)=2.0*arma::dot(Pv.col(ip),bf_y.col(ip));
443       double gz=grho(2,ip)=2.0*arma::dot(Pv.col(ip),bf_z.col(ip));
444       // Compute sigma as well
445       sigma(0,ip)=gx*gx + gy*gy + gz*gz;
446     }
447   }
448 
449   // Calculate laplacian and kinetic energy density
450   if(do_tau && do_lapl) {
451     // Adjust size of grid
452     lapl.zeros(1,grid.size());
453     tau.zeros(1,grid.size());
454 
455     // Update helpers
456     Pv_x=P*bf_x;
457     Pv_y=P*bf_y;
458     Pv_z=P*bf_z;
459 
460     // Calculate values
461     for(size_t ip=0;ip<grid.size();ip++) {
462       // Laplacian term
463       double lap=arma::dot(Pv.col(ip),bf_lapl.col(ip));
464       // Gradient term
465       double gradx(arma::dot(Pv_x.col(ip),bf_x.col(ip)));
466       double grady(arma::dot(Pv_y.col(ip),bf_y.col(ip)));
467       double gradz(arma::dot(Pv_z.col(ip),bf_z.col(ip)));
468       double grad(gradx+grady+gradz);
469 
470       // Store values
471       lapl(0,ip)=2.0*(lap+grad);
472       tau(0,ip)=0.5*grad;
473     }
474   } else if(do_tau) {
475     // Adjust size of grid
476     tau.zeros(1,grid.size());
477 
478     // Update helpers
479     Pv_x=P*bf_x;
480     Pv_y=P*bf_y;
481     Pv_z=P*bf_z;
482 
483     // Calculate values
484     for(size_t ip=0;ip<grid.size();ip++) {
485       // Gradient term
486       double gradx(arma::dot(Pv_x.col(ip),bf_x.col(ip)));
487       double grady(arma::dot(Pv_y.col(ip),bf_y.col(ip)));
488       double gradz(arma::dot(Pv_z.col(ip),bf_z.col(ip)));
489       double grad(gradx+grady+gradz);
490 
491       // Store values
492       tau(0,ip)=0.5*grad;
493     }
494   } else if(do_lapl) {
495     // Adjust size of grid
496     lapl.zeros(1,grid.size());
497 
498     // Update helpers
499     Pv_x=P*bf_x;
500     Pv_y=P*bf_y;
501     Pv_z=P*bf_z;
502 
503     // Calculate values
504     for(size_t ip=0;ip<grid.size();ip++) {
505       // Laplacian term
506       double lap=arma::dot(Pv.col(ip),bf_lapl.col(ip));
507       // Gradient term
508       double gradx(arma::dot(Pv_x.col(ip),bf_x.col(ip)));
509       double grady(arma::dot(Pv_y.col(ip),bf_y.col(ip)));
510       double gradz(arma::dot(Pv_z.col(ip),bf_z.col(ip)));
511       double grad(gradx+grady+gradz);
512 
513       // Store values
514       lapl(0,ip)=2.0*(lap+grad);
515     }
516   }
517 }
518 
update_density(const arma::mat & Pa0,const arma::mat & Pb0)519 void AngularGrid::update_density(const arma::mat & Pa0, const arma::mat & Pb0) {
520   if(!Pa0.n_elem || !Pb0.n_elem) {
521     ERROR_INFO();
522     throw std::runtime_error("Error - density matrix is empty!\n");
523   }
524 
525   // Polarized calculation.
526   polarized=true;
527 
528   // Update density vector
529   arma::mat Pa(Pa0.submat(bf_ind,bf_ind));
530   arma::mat Pb(Pb0.submat(bf_ind,bf_ind));
531 
532   Pav=Pa*bf;
533   Pbv=Pb*bf;
534 
535   // Calculate density
536   rho.zeros(2,grid.size());
537   for(size_t ip=0;ip<grid.size();ip++) {
538     rho(0,ip)=arma::dot(Pav.col(ip),bf.col(ip));
539     rho(1,ip)=arma::dot(Pbv.col(ip),bf.col(ip));
540 
541     /*
542     double na=compute_density(Pa0,*basp,grid[ip].r);
543     double nb=compute_density(Pb0,*basp,grid[ip].r);
544     if(fabs(da-na)>1e-6 || fabs(db-nb)>1e-6)
545       printf("Density at point % .3f % .3f % .3f: %e vs %e, %e vs %e\n",grid[ip].r.x,grid[ip].r.y,grid[ip].r.z,da,na,db,nb);
546     */
547   }
548 
549   // Calculate gradient
550   if(do_grad) {
551     grho.zeros(6,grid.size());
552     sigma.zeros(3,grid.size());
553     for(size_t ip=0;ip<grid.size();ip++) {
554       double gax=grho(0,ip)=2.0*arma::dot(Pav.col(ip),bf_x.col(ip));
555       double gay=grho(1,ip)=2.0*arma::dot(Pav.col(ip),bf_y.col(ip));
556       double gaz=grho(2,ip)=2.0*arma::dot(Pav.col(ip),bf_z.col(ip));
557 
558       double gbx=grho(3,ip)=2.0*arma::dot(Pbv.col(ip),bf_x.col(ip));
559       double gby=grho(4,ip)=2.0*arma::dot(Pbv.col(ip),bf_y.col(ip));
560       double gbz=grho(5,ip)=2.0*arma::dot(Pbv.col(ip),bf_z.col(ip));
561 
562       // Compute sigma as well
563       sigma(0,ip)=gax*gax + gay*gay + gaz*gaz;
564       sigma(1,ip)=gax*gbx + gay*gby + gaz*gbz;
565       sigma(2,ip)=gbx*gbx + gby*gby + gbz*gbz;
566     }
567   }
568 
569   // Calculate laplacian and kinetic energy density
570   if(do_tau || do_lapl) {
571     // Adjust size of grid
572     if(do_lapl)
573       lapl.zeros(2,grid.size());
574     if(do_tau)
575       tau.resize(2,grid.size());
576 
577     // Update helpers
578     Pav_x=Pa*bf_x;
579     Pav_y=Pa*bf_y;
580     Pav_z=Pa*bf_z;
581 
582     Pbv_x=Pb*bf_x;
583     Pbv_y=Pb*bf_y;
584     Pbv_z=Pb*bf_z;
585 
586     // Calculate values
587     if(do_tau && do_lapl) {
588       for(size_t ip=0;ip<grid.size();ip++) {
589 	// Laplacian term
590 	double lapa=arma::dot(Pav.col(ip),bf_lapl.col(ip));
591 	double lapb=arma::dot(Pbv.col(ip),bf_lapl.col(ip));
592 	// Gradient term
593 	double gradax=arma::dot(Pav_x.col(ip),bf_x.col(ip));
594 	double graday=arma::dot(Pav_y.col(ip),bf_y.col(ip));
595 	double gradaz=arma::dot(Pav_z.col(ip),bf_z.col(ip));
596 	double grada(gradax+graday+gradaz);
597 
598 	double gradbx=arma::dot(Pbv_x.col(ip),bf_x.col(ip));
599 	double gradby=arma::dot(Pbv_y.col(ip),bf_y.col(ip));
600 	double gradbz=arma::dot(Pbv_z.col(ip),bf_z.col(ip));
601 	double gradb(gradbx+gradby+gradbz);
602 
603 	// Store values
604 	lapl(0,ip)=2.0*(lapa+grada);
605 	lapl(1,ip)=2.0*(lapb+gradb);
606 	tau(0,ip)=0.5*grada;
607 	tau(1,ip)=0.5*gradb;
608       }
609     } else if(do_tau) {
610       for(size_t ip=0;ip<grid.size();ip++) {
611 	// Gradient term
612 	double gradax=arma::dot(Pav_x.col(ip),bf_x.col(ip));
613 	double graday=arma::dot(Pav_y.col(ip),bf_y.col(ip));
614 	double gradaz=arma::dot(Pav_z.col(ip),bf_z.col(ip));
615 	double grada(gradax+graday+gradaz);
616 
617 	double gradbx=arma::dot(Pbv_x.col(ip),bf_x.col(ip));
618 	double gradby=arma::dot(Pbv_y.col(ip),bf_y.col(ip));
619 	double gradbz=arma::dot(Pbv_z.col(ip),bf_z.col(ip));
620 	double gradb(gradbx+gradby+gradbz);
621 
622 	// Store values
623 	tau(0,ip)=0.5*grada;
624 	tau(1,ip)=0.5*gradb;
625       }
626     } else if(do_lapl) {
627       for(size_t ip=0;ip<grid.size();ip++) {
628 	// Laplacian term
629 	double lapa=arma::dot(Pav.col(ip),bf_lapl.col(ip));
630 	double lapb=arma::dot(Pbv.col(ip),bf_lapl.col(ip));
631 	// Gradient term
632 	double gradax=arma::dot(Pav_x.col(ip),bf_x.col(ip));
633 	double graday=arma::dot(Pav_y.col(ip),bf_y.col(ip));
634 	double gradaz=arma::dot(Pav_z.col(ip),bf_z.col(ip));
635 	double grada(gradax+graday+gradaz);
636 
637 	double gradbx=arma::dot(Pbv_x.col(ip),bf_x.col(ip));
638 	double gradby=arma::dot(Pbv_y.col(ip),bf_y.col(ip));
639 	double gradbz=arma::dot(Pbv_z.col(ip),bf_z.col(ip));
640 	double gradb(gradbx+gradby+gradbz);
641 
642 	// Store values
643 	lapl(0,ip)=2.0*(lapa+grada);
644 	lapl(1,ip)=2.0*(lapb+gradb);
645       }
646     }
647   }
648 }
649 
update_density(const arma::cx_vec & C0)650 void AngularGrid::update_density(const arma::cx_vec & C0) {
651   // Update values of densitty
652 
653   if(!C0.n_elem) {
654     ERROR_INFO();
655     throw std::runtime_error("Error - coefficient vector is empty!\n");
656   }
657 
658   // Polarized calculation.
659   polarized=true;
660 
661   // Compute value of orbital
662   arma::cx_vec C(bf_ind.n_elem);
663   for(size_t i=0;i<bf_ind.n_elem;i++)
664     C(i)=C0(bf_ind(i));
665 
666   arma::cx_rowvec Cv=arma::strans(C)*bf;
667   // Store densities
668   rho.zeros(2,grid.size());
669   for(size_t ip=0;ip<grid.size();ip++)
670     // Compute densities
671     rho(0,ip)=std::norm(Cv(ip));
672 
673   if(do_grad) {
674     grho.zeros(6,grid.size());
675     sigma.zeros(3,grid.size());
676 
677     // Compute orbital gradient
678     arma::cx_rowvec Cv_x=arma::strans(C)*bf_x;
679     arma::cx_rowvec Cv_y=arma::strans(C)*bf_y;
680     arma::cx_rowvec Cv_z=arma::strans(C)*bf_z;
681 
682     // Gradient is
683     for(size_t ip=0;ip<grid.size();ip++) {
684       grho(0,ip)=2.0*std::real(Cv_x(ip) * std::conj(Cv(ip)));
685       grho(1,ip)=2.0*std::real(Cv_y(ip) * std::conj(Cv(ip)));
686       grho(2,ip)=2.0*std::real(Cv_z(ip) * std::conj(Cv(ip)));
687 
688       // Compute values of sigma
689       sigma(0,ip) =std::pow(grho(0,ip),2) + std::pow(grho(1,ip),2) + std::pow(grho(2,ip),2);
690     }
691 
692     if(do_tau && do_lapl) {
693       // Adjust size of grid
694       lapl.zeros(2,grid.size());
695       tau.zeros(2,grid.size());
696 
697       // Compute orbital laplacian
698       arma::cx_rowvec Cv_lapl=arma::strans(C)*bf_lapl;
699 
700       for(size_t ip=0;ip<grid.size();ip++) {
701 	// Laplacian term
702 	double lap=std::real(Cv_lapl(ip)*std::conj(Cv(ip)));
703 	// Gradient term
704 	double grad=std::norm(Cv_x(ip)) + std::norm(Cv_y(ip)) + std::norm(Cv_z(ip));
705 
706 	// Laplacian is (including degeneracy factors)
707 	lapl(0,ip)=2.0*(lap+grad);
708 	// Kinetic energy density is
709 	tau(0,ip)=0.5*grad;
710       }
711     } else if(do_tau) {
712       // Adjust size of grid
713       tau.zeros(2,grid.size());
714 
715       for(size_t ip=0;ip<grid.size();ip++) {
716 	// Gradient term
717 	double grad=std::norm(Cv_x(ip)) + std::norm(Cv_y(ip)) + std::norm(Cv_z(ip));
718 	// Kinetic energy density is
719 	tau(0,ip)=0.5*grad;
720       }
721     } else if(do_lapl) {
722       // Adjust size of grid
723       lapl.zeros(2,grid.size());
724 
725       // Compute orbital laplacian
726       arma::cx_rowvec Cv_lapl=arma::strans(C)*bf_lapl;
727 
728       for(size_t ip=0;ip<grid.size();ip++) {
729 	// Laplacian term
730 	double lap=std::real(Cv_lapl(ip)*std::conj(Cv(ip)));
731 	// Gradient term
732 	double grad=std::norm(Cv_x(ip)) + std::norm(Cv_y(ip)) + std::norm(Cv_z(ip));
733 
734 	// Laplacian is (including degeneracy factors)
735 	lapl(0,ip)=2.0*(lap+grad);
736       }
737     }
738   }
739 }
740 
get_density(std::vector<dens_list_t> & list) const741 void AngularGrid::get_density(std::vector<dens_list_t> & list) const {
742   if(polarized) {
743     ERROR_INFO();
744     throw std::runtime_error("get_density() is supposed to be called with a non-polarized grid!\n");
745   }
746 
747   for(size_t ip=0;ip<grid.size();ip++) {
748     dens_list_t hlp;
749     hlp.d=rho(0,ip);
750     hlp.w=w(ip);
751     list.push_back(hlp);
752   }
753 }
754 
compute_Nel() const755 double AngularGrid::compute_Nel() const {
756   double nel=0.0;
757 
758   if(!polarized)
759     for(size_t ip=0;ip<grid.size();ip++)
760       nel+=w(ip)*rho(0,ip);
761   else
762     for(size_t ip=0;ip<grid.size();ip++)
763       nel+=w(ip)*(rho(0,ip)+rho(1,ip));
764 
765   return nel;
766 }
767 
print_grid() const768 void AngularGrid::print_grid() const {
769   for(size_t ip=0;ip<grid.size();ip++)
770     printf("%5i % f % f % f %e\n",(int) ip+1,grid[ip].r.x,grid[ip].r.y,grid[ip].r.z,grid[ip].w);
771 }
772 
init_xc()773 void AngularGrid::init_xc() {
774   // Size of grid.
775   const size_t N=grid.size();
776 
777   // Zero energy
778   zero_Exc();
779 
780   if(!polarized) {
781     // Restricted case
782     vxc.zeros(1,N);
783     if(do_grad)
784       vsigma.zeros(1,N);
785     if(do_tau)
786       vtau.zeros(1,N);
787     if(do_lapl)
788       vlapl.zeros(1,N);
789   } else {
790     // Unrestricted case
791     vxc.zeros(2,N);
792     if(do_grad)
793       vsigma.zeros(3,N);
794     if(do_tau)
795       vtau.zeros(2,N);
796     if(do_lapl) {
797       vlapl.zeros(2,N);
798     }
799   }
800 
801   // Initial values
802   do_gga=false;
803   do_mgga_l=false;
804   do_mgga_t=false;
805 }
806 
zero_Exc()807 void AngularGrid::zero_Exc() {
808   exc.zeros(grid.size());
809 }
810 
check_xc()811 void AngularGrid::check_xc() {
812   size_t nan=0;
813 
814   for(arma::uword i=0;i<exc.n_elem;i++)
815     if(std::isnan(exc[i])) {
816       nan++;
817       exc[i]=0.0;
818     }
819 
820   for(arma::uword i=0;i<vxc.n_elem;i++)
821     if(std::isnan(vxc[i])) {
822       nan++;
823       vxc[i]=0.0;
824     }
825 
826   for(arma::uword i=0;i<vsigma.n_elem;i++)
827     if(std::isnan(vsigma[i])) {
828       nan++;
829       vsigma[i]=0.0;
830     }
831 
832   for(arma::uword i=0;i<vlapl.n_elem;i++)
833     if(std::isnan(vlapl[i])) {
834       nan++;
835       vlapl[i]=0.0;
836     }
837 
838 
839   for(arma::uword i=0;i<vtau.n_elem;i++)
840     if(std::isnan(vtau[i])) {
841       nan++;
842       vtau[i]=0.0;
843     }
844 
845   if(nan) {
846     printf("Warning - %i NaNs found in xc energy / potential.\n",(int) nan);
847   }
848 }
849 
check_array(const std::vector<double> & x,size_t n,std::vector<size_t> & idx)850 void check_array(const std::vector<double> & x, size_t n, std::vector<size_t> & idx) {
851   if(x.size()%n!=0) {
852     ERROR_INFO();
853     std::ostringstream oss;
854     oss << "Size of array " << x.size() << " is not divisible by " << n << "!\n";
855     throw std::runtime_error(oss.str());
856   }
857 
858   for(size_t i=0;i<x.size()/n;i++) {
859     // Check for failed entry
860     bool fail=false;
861     for(size_t j=0;j<n;j++)
862       if(!std::isfinite(x[i*n+j]))
863 	fail=true;
864 
865     // If failed i is not in the list, add it
866     if(fail) {
867       if (!std::binary_search (idx.begin(), idx.end(), i)) {
868 	idx.push_back(i);
869 	std::sort(idx.begin(),idx.end());
870       }
871     }
872   }
873 }
874 
compute_xc(int func_id,bool pot)875 void AngularGrid::compute_xc(int func_id, bool pot) {
876   // Compute exchange-correlation functional
877 
878   // Which functional is in question?
879   bool gga, mgga_t, mgga_l;
880   is_gga_mgga(func_id,gga,mgga_t,mgga_l);
881 
882   // Update controlling flags for eval_Fxc (exchange and correlation
883   // parts might be of different type)
884   do_gga=do_gga || gga || mgga_t || mgga_l;
885   do_mgga_t=do_mgga_t || mgga_t;
886   do_mgga_l=do_mgga_l || mgga_l;
887 
888   // Amount of grid points
889   const size_t N=grid.size();
890 
891   // Work arrays - exchange and correlation are computed separately
892   arma::vec exc_wrk;
893   arma::mat vxc_wrk;
894   arma::mat vsigma_wrk;
895   arma::mat vlapl_wrk;
896   arma::mat vtau_wrk;
897 
898   if(has_exc(func_id))
899     exc_wrk.zeros(exc.n_elem);
900   if(pot) {
901     vxc_wrk.zeros(vxc.n_rows,vxc.n_cols);
902     if(gga || mgga_t || mgga_l)
903       vsigma_wrk.zeros(vsigma.n_rows,vsigma.n_cols);
904     if(mgga_t)
905       vtau_wrk.zeros(vtau.n_rows,vtau.n_cols);
906     if(mgga_l)
907       vlapl_wrk.zeros(vlapl.n_rows,vlapl.n_cols);
908   }
909 
910   // Spin variable for libxc
911   int nspin;
912   if(!polarized)
913     nspin=XC_UNPOLARIZED;
914   else
915     nspin=XC_POLARIZED;
916 
917   // Initialize libxc worker
918   xc_func_type func;
919   if(xc_func_init(&func, func_id, nspin) != 0) {
920     ERROR_INFO();
921     std::ostringstream oss;
922     oss << "Functional "<<func_id<<" not found!";
923     throw std::runtime_error(oss.str());
924   }
925   // Set parameters
926   arma::vec pars;
927   std::string functype;
928   if(is_exchange(func_id)) {
929     pars=settings.get_vec("DFTXpars");
930     functype="exchange";
931   } else if(is_correlation(func_id)) {
932     pars=settings.get_vec("DFTCpars");
933     functype="correlation";
934   }
935   if(pars.n_elem) {
936     size_t npars = xc_func_info_get_n_ext_params((xc_func_info_type*) func.info);
937     if(npars != pars.n_elem) {
938       std::ostringstream oss;
939       oss << "Inconsistent number of parameters for the " << functype << " functional.\n";
940       oss << "Expected " << npars << ", got " << pars.n_elem << ".\n";
941       throw std::logic_error(oss.str());
942     }
943     xc_func_set_ext_params(&func, pars.memptr());
944   }
945 
946   // Evaluate functionals.
947   if(has_exc(func_id)) {
948     if(pot) {
949       if(mgga_t || mgga_l) {// meta-GGA
950 	double * laplp = mgga_t ? lapl.memptr() : NULL;
951 	double * taup = mgga_t ? tau.memptr() : NULL;
952 	double * vlaplp = mgga_t ? vlapl_wrk.memptr() : NULL;
953 	double * vtaup = mgga_t ? vtau_wrk.memptr() : NULL;
954 	xc_mgga_exc_vxc(&func, N, rho.memptr(), sigma.memptr(), laplp, taup, exc_wrk.memptr(), vxc_wrk.memptr(), vsigma_wrk.memptr(), vlaplp, vtaup);
955       } else if(gga) // GGA
956 	xc_gga_exc_vxc(&func, N, rho.memptr(), sigma.memptr(), exc_wrk.memptr(), vxc_wrk.memptr(), vsigma_wrk.memptr());
957       else // LDA
958 	xc_lda_exc_vxc(&func, N, rho.memptr(), exc_wrk.memptr(), vxc_wrk.memptr());
959     } else {
960       if(mgga_t || mgga_l) { // meta-GGA
961 	double * laplp = mgga_t ? lapl.memptr() : NULL;
962 	double * taup = mgga_t ? tau.memptr() : NULL;
963 	xc_mgga_exc(&func, N, rho.memptr(), sigma.memptr(), laplp, taup, exc_wrk.memptr());
964       } else if(gga) // GGA
965 	xc_gga_exc(&func, N, rho.memptr(), sigma.memptr(), exc_wrk.memptr());
966       else // LDA
967 	xc_lda_exc(&func, N, rho.memptr(), exc_wrk.memptr());
968     }
969 
970   } else {
971     if(pot) {
972       if(mgga_t || mgga_l) { // meta-GGA
973 	double * laplp = mgga_t ? lapl.memptr() : NULL;
974 	double * taup = mgga_t ? tau.memptr() : NULL;
975 	double * vlaplp = mgga_t ? vlapl_wrk.memptr() : NULL;
976 	double * vtaup = mgga_t ? vtau_wrk.memptr() : NULL;
977 	xc_mgga_vxc(&func, N, rho.memptr(), sigma.memptr(), laplp, taup, vxc_wrk.memptr(), vsigma_wrk.memptr(), vlaplp, vtaup);
978       } else if(gga) // GGA
979 	xc_gga_vxc(&func, N, rho.memptr(), sigma.memptr(), vxc_wrk.memptr(), vsigma_wrk.memptr());
980       else // LDA
981 	xc_lda_vxc(&func, N, rho.memptr(), vxc_wrk.memptr());
982     }
983   }
984 
985   // Sum to total arrays containing both exchange and correlation
986   if(has_exc(func_id))
987     exc+=exc_wrk;
988   if(pot) {
989     if(mgga_l)
990       vlapl+=vlapl_wrk;
991     if(mgga_t)
992       vtau+=vtau_wrk;
993     if(mgga_t || mgga_l || gga)
994       vsigma+=vsigma_wrk;
995     vxc+=vxc_wrk;
996   }
997 
998   // Free functional
999   xc_func_end(&func);
1000 }
1001 
init_VV10(double b,double C,bool pot)1002 void AngularGrid::init_VV10(double b, double C, bool pot) {
1003   if(!do_grad)
1004     throw std::runtime_error("Invalid do_grad setting for VV10!\n");
1005   do_gga=true;
1006   do_mgga_t=false;
1007   do_mgga_l=false;
1008   VV10_thr=1e-8;
1009 
1010   if(rho.size() != grid.size()) {
1011     ERROR_INFO();
1012     std::ostringstream oss;
1013     oss << "Grid size is " << grid.size() << " but there are only " << rho.size() << " density values!\n";
1014     throw std::runtime_error(oss.str());
1015   }
1016   if(sigma.size() != grid.size()) {
1017     ERROR_INFO();
1018     std::ostringstream oss;
1019     oss << "Grid size is " << grid.size() << " but there are only " << sigma.size() << " reduced gradient values!\n";
1020     throw std::runtime_error(oss.str());
1021   }
1022   if(b <= 0.0 || C <= 0.0) {
1023     ERROR_INFO();
1024     std::ostringstream oss;
1025     oss << "VV10 parameters b = " << b << ", C = " << C << " are not valid.\n";
1026     throw std::runtime_error(oss.str());
1027   }
1028 
1029   if(pot) {
1030     // Plug in the constant energy density
1031     double beta=1.0/32.0 * std::pow(3.0/(b*b), 3.0/4.0);
1032     for(size_t i=0;i<grid.size();i++) {
1033       exc(i)+=beta;
1034       vxc(0,i)+=beta;
1035     }
1036   }
1037 }
1038 
VV10_Kernel(const arma::mat & xc,const arma::mat & nl,arma::mat & ret)1039 void VV10_Kernel(const arma::mat & xc, const arma::mat & nl, arma::mat & ret) {
1040   // Input arrays contain grid[i].r, omega0(i), kappa(i) (and grid[i].w, rho[i] for nl)
1041   // Return array contains: nPhi, U, and W
1042 
1043   if(xc.n_rows !=5) {
1044     ERROR_INFO();
1045     throw std::runtime_error("xc matrix has the wrong size.\n");
1046   }
1047   if(nl.n_rows !=7) {
1048     ERROR_INFO();
1049     throw std::runtime_error("nl matrix has the wrong size.\n");
1050   }
1051   if(ret.n_cols != xc.n_cols || ret.n_rows != 3) {
1052     throw std::runtime_error("Error - invalid size output array!\n");
1053   }
1054 
1055   // Loop
1056   for(size_t i=0;i<xc.n_cols;i++) {
1057     double nPhi=0.0, U=0.0, W=0.0;
1058 
1059     for(size_t j=0;j<nl.n_cols;j++) {
1060       // Distance between the grid points
1061       double dx=xc(0,i)-nl(0,j);
1062       double dy=xc(1,i)-nl(1,j);
1063       double dz=xc(2,i)-nl(2,j);
1064       double Rsq=dx*dx + dy*dy + dz*dz;
1065 
1066       // g factors
1067       double gi=xc(3,i)*Rsq + xc(4,i);
1068       double gj=nl(3,j)*Rsq + nl(4,j);
1069       // Sum of the factors
1070       double gs=gi+gj;
1071       // Reciprocal sum
1072       double rgis=1.0/gi + 1.0/gs;
1073 
1074       // Integral kernel
1075       double Phi = - 3.0 / ( 2.0 * gi * gj * gs);
1076       // Absorb grid point weight and density into kernel
1077       Phi *= nl(5,j) * nl(6,j);
1078 
1079       // Increment nPhi
1080       nPhi += Phi;
1081       // Increment U
1082       U    -= Phi * rgis;
1083       // Increment W
1084       W    -= Phi * rgis * Rsq;
1085     }
1086 
1087     // Store output
1088     ret(0,i)+=nPhi;
1089     ret(1,i)+=U;
1090     ret(2,i)+=W;
1091   }
1092 }
1093 
VV10_Kernel_F(const arma::mat & xc,const arma::mat & nl,arma::mat & ret)1094 void VV10_Kernel_F(const arma::mat & xc, const arma::mat & nl, arma::mat & ret) {
1095   // Input arrays contain grid[i].r, omega0(i), kappa(i) (and grid[i].w, rho[i] for nl)
1096   // Return array contains: nPhi, U, W, and fx, fy, fz
1097 
1098   if(xc.n_rows !=5) {
1099     ERROR_INFO();
1100     throw std::runtime_error("xc matrix has the wrong size.\n");
1101   }
1102   if(nl.n_rows !=7) {
1103     ERROR_INFO();
1104     throw std::runtime_error("nl matrix has the wrong size.\n");
1105   }
1106   if(ret.n_cols != xc.n_cols || ret.n_rows != 6) {
1107     throw std::runtime_error("Error - invalid size output array!\n");
1108   }
1109 
1110   // Loop
1111   for(size_t i=0;i<xc.n_cols;i++) {
1112     double nPhi=0.0, U=0.0, W=0.0;
1113     double fpx=0.0, fpy=0.0, fpz=0.0;
1114 
1115     for(size_t j=0;j<nl.n_cols;j++) {
1116       // Distance between the grid points
1117       double dx=xc(0,i)-nl(0,j);
1118       double dy=xc(1,i)-nl(1,j);
1119       double dz=xc(2,i)-nl(2,j);
1120       double Rsq=dx*dx + dy*dy + dz*dz;
1121 
1122       // g factors
1123       double gi=xc(3,i)*Rsq + xc(4,i);
1124       double gj=nl(3,j)*Rsq + nl(4,j);
1125       // Sum of the factors
1126       double gs=gi+gj;
1127       // Reciprocal sum
1128       double rgis=1.0/gi + 1.0/gs;
1129 
1130       // Integral kernel
1131       double Phi = - 3.0 / ( 2.0 * gi * gj * gs);
1132       // Absorb grid point weight and density into kernel
1133       Phi *= nl(5,j) * nl(6,j);
1134 
1135       // Increment nPhi
1136       nPhi += Phi;
1137       // Increment U
1138       U    -= Phi * rgis;
1139       // Increment W
1140       W    -= Phi * rgis * Rsq;
1141 
1142       // Q factor
1143       double Q = -2.0 * Phi * (xc(3,i)/gi + nl(3,j)/gj + (xc(3,i)+nl(3,j))/gs );
1144       // Increment force
1145       fpx += Q * dx;
1146       fpy += Q * dy;
1147       fpz += Q * dz;
1148     }
1149 
1150     // Store output
1151     ret(0,i)+=nPhi;
1152     ret(1,i)+=U;
1153     ret(2,i)+=W;
1154     ret(3,i)+=fpx;
1155     ret(4,i)+=fpy;
1156     ret(5,i)+=fpz;
1157   }
1158 }
1159 
collect_VV10(arma::mat & data,std::vector<size_t> & idx,double b,double C,bool nl) const1160 void AngularGrid::collect_VV10(arma::mat & data, std::vector<size_t> & idx, double b, double C, bool nl) const {
1161   if(polarized) {
1162     ERROR_INFO();
1163     throw std::runtime_error("collect_VV10 can only be called in a non-polarized grid!\n");
1164   }
1165 
1166   // Create list of points with significant densities
1167   idx.clear();
1168   for(size_t i=0;i<grid.size();i++)
1169     if(rho(0,i) >= VV10_thr)
1170       idx.push_back(i);
1171 
1172   // Create input datas
1173   if(nl)
1174     data.zeros(7,idx.size());
1175   else
1176     data.zeros(5,idx.size());
1177 
1178   // Constants for omega and kappa
1179   const double oc=4.0*M_PI/3.0;
1180   const double kc=(3.0*M_PI*b)/2.0*std::pow(9.0*M_PI,-1.0/6.0);
1181   for(size_t ii=0;ii<idx.size();ii++) {
1182     size_t i=idx[ii];
1183     data(0,ii)=grid[i].r.x;
1184     data(1,ii)=grid[i].r.y;
1185     data(2,ii)=grid[i].r.z;
1186     // omega_0[i]
1187     data(3,ii)=sqrt(C * std::pow(sigma(0,i)/(rho(0,i)*rho(0,i)),2) + oc*rho(0,i));
1188     // kappa[i]
1189     data(4,ii)=kc * cbrt(sqrt(rho(0,i)));
1190   }
1191   if(nl) {
1192     for(size_t ii=0;ii<idx.size();ii++) {
1193       size_t i=idx[ii];
1194       data(5,ii)=w(i);
1195       data(6,ii)=rho(0,i);
1196     }
1197   }
1198 }
1199 
compute_VV10(const std::vector<arma::mat> & nldata,double b,double C)1200 void AngularGrid::compute_VV10(const std::vector<arma::mat> & nldata, double b, double C) {
1201   if(polarized) {
1202     ERROR_INFO();
1203     throw std::runtime_error("compute_VV10 should be run in non-polarized mode!\n");
1204   }
1205 
1206   Timer t;
1207 
1208   // Create list of points with significant densities and input data
1209   std::vector<size_t> idx;
1210   arma::mat xc;
1211   collect_VV10(xc, idx, b, C, false);
1212 
1213   /*
1214   for(size_t i=0;i<xc.n_rows;i++) {
1215     printf("% .3f % .3f % .3f",grid[i].r.x,grid[i].r.y,grid[i].r.z);
1216     for(size_t j=0;j<xc.n_cols;j++)
1217       printf(" % e",xc(i,j));
1218     printf("\n");
1219   }
1220   */
1221 
1222   // Calculate integral kernel
1223   VV10_arr.zeros(3,xc.n_cols);
1224   for(size_t i=0;i<nldata.size();i++)
1225     VV10_Kernel(xc,nldata[i],VV10_arr);
1226 
1227   // Collect data
1228   for(size_t ii=0;ii<idx.size();ii++) {
1229     // Index of grid point is
1230     size_t i=idx[ii];
1231 
1232     // Increment the energy density
1233     exc[i] += 0.5 * VV10_arr(0,ii);
1234 
1235     // Increment LDA and GGA parts of potential.
1236     double ri=rho(0,i);
1237     double ri4=std::pow(ri,4);
1238     double si=sigma(0,i);
1239     double w0=xc(3,ii);
1240     double dkdn  = xc(4,ii)/(6.0*ri); // d kappa / d n
1241     double dw0ds = C*si / ( w0 * ri4); // d omega0 / d sigma
1242     double dw0dn = 2.0/w0 * ( M_PI/3.0 - C*si*si / (ri*ri4)); // d omega0 / d n
1243     vxc(0,i) += VV10_arr(0,ii) + ri *( dkdn * VV10_arr(1,ii) + dw0dn * VV10_arr(2,ii));
1244     vsigma(0,i) += ri * dw0ds * VV10_arr(2,ii);
1245   }
1246 
1247   /*
1248   for(size_t i=0;i<VV10_arr.n_rows;i++) {
1249     printf("% .3f % .3f % .3f",grid[i].r.x,grid[i].r.y,grid[i].r.z);
1250     for(size_t j=0;j<VV10_arr.n_cols;j++)
1251       printf(" % e",VV10_arr(i,j));
1252     printf("\n");
1253   }
1254   */
1255 }
1256 
compute_VV10_F(const std::vector<arma::mat> & nldata,const std::vector<angshell_t> & nlgrids,double b,double C)1257 arma::vec AngularGrid::compute_VV10_F(const std::vector<arma::mat> & nldata, const std::vector<angshell_t> & nlgrids, double b, double C) {
1258   if(polarized) {
1259     ERROR_INFO();
1260     throw std::runtime_error("compute_VV10_F should be run in non-polarized mode!\n");
1261   }
1262 
1263   // Create list of points with significant densities and input data
1264   std::vector<size_t> idx;
1265   arma::mat xc;
1266   collect_VV10(xc, idx, b, C, false);
1267 
1268   // Calculate integral kernels
1269   VV10_arr.zeros(6,xc.n_cols);
1270   for(size_t i=0;i<nldata.size();i++)
1271     if(nlgrids[i].atind==info.atind) {
1272       // No Q contribution!
1273       //VV10_Kernel(xc,nldata[i],VV10_arr.cols(0,2));
1274 
1275       arma::mat Kat(3,xc.n_rows);
1276       Kat.zeros();
1277       VV10_Kernel(xc,nldata[i],Kat);
1278       VV10_arr.rows(0,2)+=Kat;
1279     } else
1280       // Full contribution
1281       VV10_Kernel_F(xc,nldata[i],VV10_arr);
1282 
1283   // Evaluate force contribution
1284   double fx=0.0, fy=0.0, fz=0.0;
1285 
1286   for(size_t ii=0;ii<idx.size();ii++) {
1287     // Index of grid point is
1288     size_t i=idx[ii];
1289 
1290     // Increment the energy density
1291     exc[i] += 0.5 * VV10_arr(0,ii);
1292 
1293     // Increment LDA and GGA parts of potential.
1294     double ri=rho(0,i);
1295     double ri4=std::pow(ri,4);
1296     double si=sigma(0,i);
1297     double w0=xc(3,ii);
1298     double dkdn  = xc(4,ii)/(6.0*ri); // d kappa / d n
1299     double dw0ds = C*si / ( w0 * ri4); // d omega0 / d sigma
1300     double dw0dn = 2.0/w0 * ( M_PI/3.0 - C*si*si / (ri*ri4)); // d omega0 / d n
1301     vxc(0,i) += VV10_arr(0,ii) + ri *( dkdn * VV10_arr(1,ii) + dw0dn * VV10_arr(2,ii));
1302     vsigma(0,i) += ri * dw0ds * VV10_arr(2,ii);
1303 
1304     // Increment total force
1305     fx += grid[i].w*rho(0,i)*VV10_arr(3,ii);
1306     fy += grid[i].w*rho(0,i)*VV10_arr(4,ii);
1307     fz += grid[i].w*rho(0,i)*VV10_arr(5,ii);
1308   }
1309 
1310   arma::vec F(3);
1311   F(0)=fx;
1312   F(1)=fy;
1313   F(2)=fz;
1314 
1315   return F;
1316 }
1317 
print_density(FILE * f) const1318 void AngularGrid::print_density(FILE *f) const {
1319   // Loop over grid points
1320   for(size_t i=0;i<grid.size();i++) {
1321     // Get data in point
1322     libxc_dens_t d=get_dens(i);
1323 
1324     // Print out data
1325     fprintf(f,"% .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e\n",d.rhoa,d.rhob,d.sigmaaa,d.sigmaab,d.sigmabb,d.lapla,d.laplb,d.taua,d.taub);
1326   }
1327 }
1328 
print_potential(int func_id,FILE * f) const1329 void AngularGrid::print_potential(int func_id, FILE *f) const {
1330   // Loop over grid points
1331   for(size_t i=0;i<grid.size();i++) {
1332     // Get data in point
1333     libxc_pot_t d=get_pot(i);
1334     double e=exc(i);
1335 
1336     int nspin;
1337     if(polarized)
1338       nspin=XC_POLARIZED;
1339     else
1340       nspin=XC_POLARIZED;
1341 
1342     // Print out data
1343     fprintf(f, "%3i %2i % .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e\n",func_id,nspin,e,d.vrhoa,d.vrhob,d.vsigmaaa,d.vsigmaab,d.vsigmabb,d.vlapla,d.vlaplb,d.vtaua,d.vtaub);
1344   }
1345 }
1346 
check_potential(FILE * f) const1347 void AngularGrid::check_potential(FILE *f) const {
1348   // Loop over grid points
1349   for(size_t i=0;i<grid.size();i++) {
1350     // Get data in point
1351     libxc_pot_t v=get_pot(i);
1352     double e=exc(i);
1353     if(std::isnan(e) || std::isnan(v.vrhoa) || std::isnan(v.vrhob) || std::isnan(v.vsigmaaa) || std::isnan(v.vsigmaab) || std::isnan(v.vsigmabb) || std::isnan(v.vlapla) || std::isnan(v.vlaplb) || std::isnan(v.vtaua) || std::isnan(v.vtaub)) {
1354       libxc_dens_t d=get_dens(i);
1355       fprintf(f,"***\n");
1356       fprintf(f,"% .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e\n",d.rhoa,d.rhob,d.sigmaaa,d.sigmaab,d.sigmabb,d.lapla,d.laplb,d.taua,d.taub);
1357       fprintf(f,"% .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e % .16e\n",e,v.vrhoa,v.vrhob,v.vsigmaaa,v.vsigmaab,v.vsigmabb,v.vlapla,v.vlaplb,v.vtaua,v.vtaub);
1358     }
1359   }
1360   fflush(f);
1361 }
1362 
get_weights()1363 void AngularGrid::get_weights() {
1364   if(!grid.size())
1365     return;
1366   w.zeros(grid.size());
1367   for(size_t pi=0;pi<grid.size();pi++)
1368     w(pi)=grid[pi].w;
1369 }
1370 
get_dens(size_t idx) const1371 libxc_dens_t AngularGrid::get_dens(size_t idx) const {
1372   libxc_dens_t ret;
1373 
1374   // Alpha and beta density
1375   ret.rhoa=0.0;
1376   ret.rhob=0.0;
1377 
1378   // Sigma variables
1379   ret.sigmaaa=0.0;
1380   ret.sigmaab=0.0;
1381   ret.sigmabb=0.0;
1382 
1383   // Laplacians
1384   ret.lapla=0.0;
1385   ret.laplb=0.0;
1386 
1387   // Kinetic energy density
1388   ret.taua=0.0;
1389   ret.taub=0.0;
1390 
1391   if(polarized) {
1392     ret.rhoa=rho(0,idx);
1393     ret.rhob=rho(1,idx);
1394   } else {
1395     ret.rhoa=ret.rhob=rho(0,idx)/2.0;
1396   }
1397 
1398   if(do_gga) {
1399     if(polarized) {
1400       ret.sigmaaa=sigma(0,idx);
1401       ret.sigmaab=sigma(1,idx);
1402       ret.sigmabb=sigma(2,idx);
1403     } else {
1404       ret.sigmaaa=ret.sigmaab=ret.sigmabb=sigma(0,idx)/4.0;
1405     }
1406   }
1407 
1408   if(do_mgga_t) {
1409     if(polarized) {
1410       ret.taua=tau(0,idx);
1411       ret.taub=tau(1,idx);
1412     } else {
1413       ret.taua=ret.taub=tau(0,idx)/2.0;
1414     }
1415   }
1416   if(do_mgga_l) {
1417     if(polarized) {
1418       ret.lapla=lapl(0,idx);
1419       ret.laplb=lapl(1,idx);
1420     } else {
1421       ret.lapla=ret.laplb=lapl(0,idx)/2.0;
1422     }
1423   }
1424 
1425   return ret;
1426 }
1427 
get_data(size_t idx) const1428 libxc_debug_t AngularGrid::get_data(size_t idx) const {
1429   libxc_debug_t d;
1430   d.dens=get_dens(idx);
1431   d.pot=get_pot(idx);
1432   d.e=exc[idx];
1433   return d;
1434 }
1435 
get_pot(size_t idx) const1436 libxc_pot_t AngularGrid::get_pot(size_t idx) const {
1437   libxc_pot_t ret;
1438 
1439   // Alpha and beta density
1440   ret.vrhoa=0.0;
1441   ret.vrhob=0.0;
1442 
1443   // Sigma variables
1444   ret.vsigmaaa=0.0;
1445   ret.vsigmaab=0.0;
1446   ret.vsigmabb=0.0;
1447 
1448   // Laplacians
1449   ret.vlapla=0.0;
1450   ret.vlaplb=0.0;
1451 
1452   // Kinetic energy density
1453   ret.vtaua=0.0;
1454   ret.vtaub=0.0;
1455 
1456   if(polarized) {
1457     ret.vrhoa=vxc(0,idx);
1458     ret.vrhob=vxc(1,idx);
1459   } else {
1460     ret.vrhoa=ret.vrhob=vxc(0,idx)/2.0;
1461   }
1462 
1463   if(do_gga) {
1464     if(polarized) {
1465       ret.vsigmaaa=vsigma(0,idx);
1466       ret.vsigmaab=vsigma(1,idx);
1467       ret.vsigmabb=vsigma(2,idx);
1468     } else {
1469       ret.vsigmaaa=ret.vsigmaab=ret.vsigmabb=vsigma(0,idx)/4.0;
1470     }
1471   }
1472 
1473   if(do_mgga_t) {
1474     if(polarized) {
1475       ret.vtaua=vtau(0,idx);
1476       ret.vtaub=vtau(1,idx);
1477     } else {
1478       ret.vtaua=ret.vtaub=vtau(0,idx)/2.0;
1479     }
1480   }
1481 
1482   if(do_mgga_l) {
1483     if(polarized) {
1484       ret.vlapla=vlapl(0,idx);
1485       ret.vlaplb=vlapl(1,idx);
1486     } else {
1487       ret.vlapla=ret.vlaplb=vlapl(0,idx)/2.0;
1488     }
1489   }
1490 
1491   return ret;
1492 }
1493 
eval_Exc() const1494 double AngularGrid::eval_Exc() const {
1495   arma::uvec screen(screen_density());
1496 
1497   arma::rowvec dens(rho.row(0));
1498   if(polarized)
1499     dens+=rho.row(1);
1500 
1501   return arma::sum(w(screen)%exc(screen)%dens(screen));
1502 }
1503 
eval_overlap(arma::mat & So) const1504 void AngularGrid::eval_overlap(arma::mat & So) const {
1505   // Calculate in subspace
1506   arma::mat S(bf_ind.n_elem,bf_ind.n_elem);
1507   S.zeros();
1508   increment_lda<double>(S,w,bf);
1509   // Increment
1510   So.submat(bf_ind,bf_ind)+=S;
1511 }
1512 
eval_diag_overlap(arma::vec & S) const1513 void AngularGrid::eval_diag_overlap(arma::vec & S) const {
1514   S.zeros(pot_bf_ind.n_elem);
1515 
1516   arma::mat bft(bf.t());
1517   for(size_t j=0;j<bf.n_rows;j++)
1518     S(bf_potind(j))=arma::sum(arma::square(bft.col(j))%w.t());
1519 }
1520 
calculate_rho(const arma::cx_mat & Cocc,const arma::mat & bf)1521 static arma::mat calculate_rho(const arma::cx_mat & Cocc, const arma::mat & bf) {
1522   // Transpose C to (norb,nbf)
1523   arma::cx_mat C(arma::strans(Cocc));
1524   // Calculate values of orbitals at grid points: (norb, ngrid)
1525   arma::cx_mat orbvals(C*bf);
1526 
1527   // Orbital densities at grid points (norb, ngrid)
1528   return arma::real(orbvals%arma::conj(orbvals));
1529 }
1530 
eval_overlap(const arma::cx_mat & Cocc,size_t io,double k,arma::mat & So,double thr) const1531 void AngularGrid::eval_overlap(const arma::cx_mat & Cocc, size_t io, double k, arma::mat & So, double thr) const {
1532   // Calculate in subspace
1533   arma::mat S(bf_ind.n_elem,bf_ind.n_elem);
1534   S.zeros();
1535 
1536   // Orbital densities at grid points (norb, ngrid)
1537   arma::mat orbdens(calculate_rho(Cocc.rows(bf_ind),bf));
1538 
1539   // Calculate weightings
1540   arma::rowvec ww(w);
1541   for(size_t ip=0;ip<grid.size();ip++) {
1542     // Orbital density is
1543     double rhois(orbdens(io,ip));
1544     // Total density
1545     double rhotot(arma::sum(orbdens.col(ip)));
1546 
1547     // Screen for bad behavior
1548     if(rhotot>=thr)
1549       ww(ip)*=std::pow(rhois/rhotot,k);
1550     else
1551       ww(ip)=0.0;
1552   }
1553 
1554   increment_lda<double>(S,ww,bf);
1555   // Increment
1556   So.submat(bf_ind,bf_ind)+=S;
1557 }
1558 
eval_overlap(const arma::cx_mat & Cocc,const arma::vec & Esi,double k,arma::mat & So,double thr) const1559 void AngularGrid::eval_overlap(const arma::cx_mat & Cocc, const arma::vec & Esi, double k, arma::mat & So, double thr) const {
1560   // Calculate in subspace
1561   arma::mat S(bf_ind.n_elem,bf_ind.n_elem);
1562   S.zeros();
1563 
1564   // Orbital densities at grid points (norb, ngrid)
1565   arma::mat orbdens(calculate_rho(Cocc.rows(bf_ind),bf));
1566   arma::mat orbdenspk(arma::pow(orbdens,k));
1567 
1568   // Calculate weightings
1569   arma::rowvec ww(w);
1570   for(size_t ip=0;ip<grid.size();ip++) {
1571     // Total density
1572     double rhotot(arma::sum(orbdens.col(ip)));
1573 
1574     // Screen for bad behavior
1575     if(rhotot>=thr)
1576       ww(ip)*=arma::dot(Esi,orbdenspk.col(ip))/std::pow(rhotot,k);
1577     else
1578       ww(ip)=0.0;
1579   }
1580 
1581   increment_lda<double>(S,ww,bf);
1582   // Increment
1583   So.submat(bf_ind,bf_ind)+=S;
1584 }
1585 
calculate_tau_grho_tauw(const arma::cx_mat & Cocc,const arma::mat & bf,const arma::mat & bf_x,const arma::mat & bf_y,const arma::mat & bf_z,arma::vec & tau,arma::mat & grho,arma::vec & tau_w)1586 static void calculate_tau_grho_tauw(const arma::cx_mat & Cocc, const arma::mat & bf, const arma::mat & bf_x, const arma::mat & bf_y, const arma::mat & bf_z, arma::vec & tau, arma::mat & grho, arma::vec & tau_w) {
1587   // Density matrix
1588   arma::cx_mat P(Cocc*arma::trans(Cocc));
1589   arma::cx_mat Pvec(P*bf);
1590   arma::cx_mat Pvec_x(P*bf_x);
1591   arma::cx_mat Pvec_y(P*bf_y);
1592   arma::cx_mat Pvec_z(P*bf_z);
1593 
1594   // Kinetic energy density
1595   tau.zeros(bf.n_cols);
1596   tau_w.zeros(bf.n_cols);
1597   grho.zeros(3,bf.n_cols);
1598 
1599   for(size_t ip=0;ip<tau.n_elem;ip++) {
1600     double kinx(std::real(arma::dot(Pvec_x.col(ip),bf_x.col(ip))));
1601     double kiny(std::real(arma::dot(Pvec_y.col(ip),bf_y.col(ip))));
1602     double kinz(std::real(arma::dot(Pvec_z.col(ip),bf_z.col(ip))));
1603     tau(ip)=0.5*(kinx+kiny+kinz);
1604 
1605     // Density is
1606     double n=std::real(arma::dot(Pvec.col(ip),bf.col(ip)));
1607     // Density gradient
1608     grho(0,ip)=2.0*std::real(arma::dot(Pvec.col(ip),bf_x.col(ip)));
1609     grho(1,ip)=2.0*std::real(arma::dot(Pvec.col(ip),bf_y.col(ip)));
1610     grho(2,ip)=2.0*std::real(arma::dot(Pvec.col(ip),bf_z.col(ip)));
1611     double g=arma::dot(grho.col(ip),grho.col(ip));
1612     tau_w(ip)=g/(8*n);
1613   }
1614 
1615   // Transpose grho
1616   grho=arma::trans(grho);
1617 }
1618 
eval_tau_overlap(const arma::cx_mat & Cocc,double k,arma::mat & So,double thr) const1619 void AngularGrid::eval_tau_overlap(const arma::cx_mat & Cocc, double k, arma::mat & So, double thr) const {
1620   // Calculate in subspace
1621   arma::mat S(bf_ind.n_elem,bf_ind.n_elem);
1622   S.zeros();
1623 
1624   if(!do_grad) throw std::logic_error("Must have gradients enabled to calculate tau overlap!\n");
1625 
1626   // Get kinetic and Weiszäcker kinetic energy densities
1627   arma::mat gr;
1628   arma::vec t, tw;
1629   calculate_tau_grho_tauw(Cocc.rows(bf_ind),bf,bf_x,bf_y,bf_z,t,gr,tw);
1630 
1631   // Calculate weightings
1632   arma::rowvec ww(w);
1633   for(size_t ip=0;ip<grid.size();ip++) {
1634     // Screen for bad behavior
1635     if(t(ip)>=thr)
1636       ww(ip)*=std::pow(tw(ip)/t(ip),k);
1637     else
1638       ww(ip)=0.0;
1639   }
1640 
1641   increment_lda<double>(S,ww,bf);
1642   // Increment
1643   So.submat(bf_ind,bf_ind)+=S;
1644 }
1645 
eval_tau_overlap_deriv(const arma::cx_mat & Cocc,const arma::vec & Esi,double k,arma::mat & So,double thr) const1646 void AngularGrid::eval_tau_overlap_deriv(const arma::cx_mat & Cocc, const arma::vec & Esi, double k, arma::mat & So, double thr) const {
1647   if(!do_grad) throw std::logic_error("Must have gradients enabled to calculate tau overlap!\n");
1648 
1649   // Get orbital densities
1650   arma::mat orbdens(calculate_rho(Cocc.rows(bf_ind),bf));
1651 
1652   // Get kinetic and Weiszäcker kinetic energy densities
1653   arma::mat gr;
1654   arma::vec t, tw;
1655   calculate_tau_grho_tauw(Cocc.rows(bf_ind),bf,bf_x,bf_y,bf_z,t,gr,tw);
1656 
1657   // Calculate in subspace
1658   arma::mat S(bf_ind.n_elem,bf_ind.n_elem);
1659   S.zeros();
1660 
1661   // LDA part
1662   {
1663     // Calculate weightings
1664     arma::rowvec ww(w);
1665     for(size_t ip=0;ip<grid.size();ip++) {
1666       // Screen for bad behavior
1667       if(t(ip)>=thr)
1668 	ww(ip)*=-k*std::pow(tw(ip)/t(ip),k)*(arma::dot(Esi,orbdens.col(ip))/arma::sum(orbdens.col(ip)));
1669       else
1670 	ww(ip)=0.0;
1671     }
1672 
1673     increment_lda<double>(S,ww,bf);
1674   }
1675 
1676   // meta-GGA part
1677   {
1678     // Calculate weightings
1679     arma::rowvec ww(w);
1680     for(size_t ip=0;ip<grid.size();ip++) {
1681       // Screen for bad behavior
1682       if(t(ip)>=thr)
1683 	ww(ip)*=-k/2*std::pow(tw(ip)/t(ip),k)*(arma::dot(Esi,orbdens.col(ip))/t(ip));
1684       else
1685 	ww(ip)=0.0;
1686     }
1687 
1688     increment_mgga_kin<double>(S,ww,bf_x,bf_y,bf_z);
1689   }
1690 
1691   // GGA part
1692   {
1693     // Multiply grad rho by the weights and the integrand
1694     for(size_t ip=0;ip<gr.n_rows;ip++)
1695       for(size_t ic=0;ic<gr.n_cols;ic++)
1696 	if(t(ip)>=thr)
1697 	  gr(ip,ic)*=w(ip)*k/4*std::pow(tw(ip)/t(ip),k-1)*arma::dot(Esi,orbdens.col(ip))/(arma::sum(orbdens.col(ip))*t(ip));
1698 	else
1699 	  gr(ip,ic)=0;
1700 
1701     increment_gga<double>(S,gr,bf,bf_x,bf_y,bf_z);
1702   }
1703 
1704   // Increment
1705   So.submat(bf_ind,bf_ind)+=S;
1706 }
1707 
eval_Fxc(arma::mat & Ho) const1708 void AngularGrid::eval_Fxc(arma::mat & Ho) const {
1709   if(polarized) {
1710     ERROR_INFO();
1711     throw std::runtime_error("Refusing to compute restricted Fock matrix with unrestricted density.\n");
1712   }
1713 
1714   // Screen quadrature points by small densities
1715   arma::uvec screen(screen_density());
1716   // No important grid points, return
1717   if(!screen.n_elem)
1718     return;
1719 
1720   // Work matrix
1721   arma::mat H(bf_ind.n_elem,bf_ind.n_elem);
1722   H.zeros();
1723 
1724   {
1725     // LDA potential
1726     arma::rowvec vrho(vxc.row(0));
1727     // Multiply weights into potential
1728     vrho%=w;
1729     // Increment matrix
1730     increment_lda<double>(H,vrho,bf,screen);
1731   }
1732 
1733   if(do_gga) {
1734     // Get vsigma
1735     arma::rowvec vs(vsigma.row(0));
1736     // Get grad rho
1737     arma::uvec idx(arma::linspace<arma::uvec>(0,2,3));
1738     arma::mat gr(arma::trans(grho.rows(idx)));
1739     // Multiply grad rho by vsigma and the weights
1740     for(size_t i=0;i<gr.n_rows;i++)
1741       for(size_t ic=0;ic<gr.n_cols;ic++)
1742 	gr(i,ic)=2.0*w(i)*vs(i)*gr(i,ic);
1743     // Increment matrix
1744     increment_gga<double>(H,gr,bf,bf_x,bf_y,bf_z,screen);
1745   }
1746 
1747   if(do_mgga_t && do_mgga_l) {
1748     // Get vtau and vlapl
1749     arma::rowvec vt(vtau.row(0));
1750     arma::rowvec vl(vlapl.row(0));
1751     // Scale both with weights
1752     vt%=w;
1753     vl%=w;
1754 
1755     // Evaluate kinetic contribution
1756     increment_mgga_kin<double>(H,0.5*vt + 2.0*vl,bf_x,bf_y,bf_z,screen);
1757 
1758     // Evaluate laplacian contribution. Get function laplacian
1759     increment_mgga_lapl<double>(H,vl,bf,bf_lapl,screen);
1760 
1761   } else if(do_mgga_t) {
1762     arma::rowvec vt(vtau.row(0));
1763     vt%=w;
1764     increment_mgga_kin<double>(H,0.5*vt,bf_x,bf_y,bf_z,screen);
1765 
1766   } else if(do_mgga_l) {
1767     arma::rowvec vl(vlapl.row(0));
1768     vl%=w;
1769     increment_mgga_kin<double>(H,2.0*vl,bf_x,bf_y,bf_z,screen);
1770     increment_mgga_lapl<double>(H,vl,bf,bf_lapl,screen);
1771   }
1772 
1773   Ho(bf_ind,bf_ind)+=H;
1774 }
1775 
eval_diag_Fxc(arma::vec & H) const1776 void AngularGrid::eval_diag_Fxc(arma::vec & H) const {
1777   if(polarized) {
1778     ERROR_INFO();
1779     throw std::runtime_error("Refusing to compute restricted Fock matrix with unrestricted density.\n");
1780   }
1781 
1782   // Initialize memory
1783   H.zeros(pot_bf_ind.n_elem);
1784 
1785   // Screen quadrature points by small densities
1786   arma::uvec screen(screen_density());
1787   // No important grid points, return
1788   if(!screen.n_elem)
1789     return;
1790 
1791   {
1792     // LDA potential
1793     arma::rowvec vrho(vxc.row(0));
1794     // Multiply weights into potential
1795     vrho%=w;
1796     // Increment matrix
1797     for(size_t iip=0;iip<screen.n_elem;iip++) {
1798       size_t ip(screen(iip));
1799       for(size_t j=0;j<bf.n_rows;j++)
1800 	H(bf_potind(j))+=vrho(ip)*bf(j,ip)*bf(j,ip);
1801     }
1802   }
1803 
1804   if(do_gga) {
1805     // Get vsigma
1806     arma::rowvec vs(vsigma.row(0));
1807     // Get grad rho
1808     arma::uvec idx(arma::linspace<arma::uvec>(0,2,3));
1809     arma::mat gr(arma::trans(grho.rows(idx)));
1810     // Multiply grad rho by vsigma and the weights
1811     for(size_t i=0;i<gr.n_rows;i++)
1812       for(size_t ic=0;ic<gr.n_cols;ic++)
1813 	gr(i,ic)=2.0*w(i)*vs(i)*gr(i,ic);
1814     // Increment matrix
1815     for(size_t iip=0;iip<screen.n_elem;iip++) {
1816       size_t ip(screen(iip));
1817       for(size_t j=0;j<bf.n_rows;j++)
1818 	H(bf_potind(j))+=2.0 * (gr(ip,0)*bf_x(j,ip) + gr(ip,1)*bf_y(j,ip) + gr(ip,2)*bf_z(j,ip)) * bf(j,ip);
1819     }
1820 
1821     if(do_mgga_t && do_mgga_l) {
1822       // Get vtau and vlapl
1823       arma::rowvec vt(vtau.row(0));
1824       arma::rowvec vl(vlapl.row(0));
1825       // Scale both with weights
1826       vt%=w;
1827       vl%=w;
1828 
1829       // Evaluate kinetic contribution
1830       for(size_t iip=0;iip<screen.n_elem;iip++) {
1831 	size_t ip(screen(iip));
1832 	for(size_t j=0;j<bf.n_rows;j++)
1833 	  H(bf_potind(j))+=(0.5*vt(ip)+2.0*vl(ip))*(bf_x(j,ip)*bf_x(j,ip) + bf_y(j,ip)*bf_y(j,ip) + bf_z(j,ip)*bf_z(j,ip));
1834       }
1835 
1836       // Evaluate laplacian contribution.
1837       for(size_t iip=0;iip<screen.n_elem;iip++) {
1838 	size_t ip(screen(iip));
1839 	for(size_t j=0;j<bf.n_rows;j++)
1840 	  H(bf_potind(j))+=2.0*vl(ip)*bf(j,ip)*bf_lapl(j,ip);
1841       }
1842 
1843     } else if(do_mgga_t) {
1844       arma::rowvec vt(vtau.row(0));
1845       vt%=w;
1846 
1847       // Evaluate kinetic contribution
1848       for(size_t iip=0;iip<screen.n_elem;iip++) {
1849 	size_t ip(screen(iip));
1850 	for(size_t j=0;j<bf.n_rows;j++)
1851 	  H(bf_potind(j))+=0.5*vt(ip)*(bf_x(j,ip)*bf_x(j,ip) + bf_y(j,ip)*bf_y(j,ip) + bf_z(j,ip)*bf_z(j,ip));
1852       }
1853 
1854     } else if(do_mgga_l) {
1855       arma::rowvec vl(vlapl.row(0));
1856       vl%=w;
1857 
1858       // Evaluate kinetic contribution
1859       for(size_t iip=0;iip<screen.n_elem;iip++) {
1860 	size_t ip(screen(iip));
1861 	for(size_t j=0;j<bf.n_rows;j++)
1862 	  H(bf_potind(j))+=2.0*vl(ip)*(bf_x(j,ip)*bf_x(j,ip) + bf_y(j,ip)*bf_y(j,ip) + bf_z(j,ip)*bf_z(j,ip));
1863       }
1864 
1865       // Evaluate laplacian contribution.
1866       for(size_t iip=0;iip<screen.n_elem;iip++) {
1867 	size_t ip(screen(iip));
1868 	for(size_t j=0;j<bf.n_rows;j++)
1869 	  H(bf_potind(j))+=2.0*vl(ip)*bf(j,ip)*bf_lapl(j,ip);
1870       }
1871     }
1872   }
1873 }
1874 
eval_Fxc(arma::mat & Hao,arma::mat & Hbo,bool beta) const1875 void AngularGrid::eval_Fxc(arma::mat & Hao, arma::mat & Hbo, bool beta) const {
1876   if(!polarized) {
1877     ERROR_INFO();
1878     throw std::runtime_error("Refusing to compute unrestricted Fock matrix with restricted density.\n");
1879   }
1880 
1881   // Screen quadrature points by small densities
1882   arma::uvec screen(screen_density());
1883   // No important grid points, return
1884   if(!screen.n_elem)
1885     return;
1886 
1887   arma::mat Ha, Hb;
1888   Ha.zeros(bf_ind.n_elem,bf_ind.n_elem);
1889   if(beta)
1890     Hb.zeros(bf_ind.n_elem,bf_ind.n_elem);
1891 
1892   {
1893     // LDA potential
1894     arma::rowvec vrhoa(vxc.row(0));
1895     // Multiply weights into potential
1896     vrhoa%=w;
1897     // Increment matrix
1898     increment_lda<double>(Ha,vrhoa,bf,screen);
1899 
1900     if(beta) {
1901       arma::rowvec vrhob(vxc.row(1));
1902       vrhob%=w;
1903       increment_lda<double>(Hb,vrhob,bf,screen);
1904     }
1905   }
1906   if(Ha.has_nan() || (beta && Hb.has_nan()))
1907     throw std::logic_error("NaN encountered!\n");
1908 
1909   if(do_gga) {
1910     // Get vsigma
1911     arma::rowvec vs_aa(vsigma.row(0));
1912     arma::rowvec vs_ab(vsigma.row(1));
1913 
1914     // Get grad rho
1915     arma::uvec idxa(arma::linspace<arma::uvec>(0,2,3));
1916     arma::uvec idxb(arma::linspace<arma::uvec>(3,5,3));
1917     arma::mat gr_a0(arma::trans(grho.rows(idxa)));
1918     arma::mat gr_b0(arma::trans(grho.rows(idxb)));
1919 
1920     // Multiply grad rho by vsigma and the weights
1921     arma::mat gr_a(gr_a0);
1922     for(size_t i=0;i<gr_a.n_rows;i++)
1923       for(size_t ic=0;ic<gr_a.n_cols;ic++)
1924 	gr_a(i,ic)=w(i)*(2.0*vs_aa(i)*gr_a0(i,ic) + vs_ab(i)*gr_b0(i,ic));
1925     // Increment matrix
1926     increment_gga<double>(Ha,gr_a,bf,bf_x,bf_y,bf_z,screen);
1927 
1928     if(beta) {
1929       arma::rowvec vs_bb(vsigma.row(2));
1930       arma::mat gr_b(gr_b0);
1931       for(size_t i=0;i<gr_b.n_rows;i++)
1932 	for(size_t ic=0;ic<gr_b.n_cols;ic++)
1933 	  gr_b(i,ic)=w(i)*(2.0*vs_bb(i)*gr_b0(i,ic) + vs_ab(i)*gr_a0(i,ic));
1934       increment_gga<double>(Hb,gr_b,bf,bf_x,bf_y,bf_z,screen);
1935     }
1936   }
1937 
1938   if(do_mgga_t && do_mgga_l) {
1939     // Get vtau and vlapl
1940     arma::rowvec vt_a(vtau.row(0));
1941     arma::rowvec vl_a(vlapl.row(0));
1942 
1943     // Scale both with weights
1944     vt_a%=w;
1945     vl_a%=w;
1946 
1947     // Evaluate kinetic contribution
1948     increment_mgga_kin<double>(Ha,0.5*vt_a + 2.0*vl_a,bf_x,bf_y,bf_z,screen);
1949 
1950     // Evaluate laplacian contribution. Get function laplacian
1951     increment_mgga_lapl<double>(Ha,vl_a,bf,bf_lapl,screen);
1952 
1953     if(beta) {
1954       arma::rowvec vt_b(vtau.row(1));
1955       arma::rowvec vl_b(vlapl.row(1));
1956       vt_b%=w;
1957       vl_b%=w;
1958       increment_mgga_kin<double>(Hb,0.5*vt_b + 2.0*vl_b,bf_x,bf_y,bf_z,screen);
1959       increment_mgga_lapl<double>(Hb,vl_b,bf,bf_lapl,screen);
1960     }
1961   } else if(do_mgga_t) {
1962     arma::rowvec vt_a(vtau.row(0));
1963     vt_a%=w;
1964     increment_mgga_kin<double>(Ha,0.5*vt_a,bf_x,bf_y,bf_z,screen);
1965     if(beta) {
1966       arma::rowvec vt_b(vtau.row(1));
1967       vt_b%=w;
1968       increment_mgga_kin<double>(Hb,0.5*vt_b,bf_x,bf_y,bf_z,screen);
1969     }
1970   } else if(do_mgga_l) {
1971     arma::rowvec vl_a(vlapl.row(0));
1972     vl_a%=w;
1973     increment_mgga_kin<double>(Ha,2.0*vl_a,bf_x,bf_y,bf_z,screen);
1974     increment_mgga_lapl<double>(Ha,vl_a,bf,bf_lapl,screen);
1975 
1976     if(beta) {
1977       arma::rowvec vl_b(vlapl.row(1));
1978       vl_b%=w;
1979       increment_mgga_kin<double>(Hb,2.0*vl_b,bf_x,bf_y,bf_z,screen);
1980       increment_mgga_lapl<double>(Hb,vl_b,bf,bf_lapl,screen);
1981     }
1982   }
1983 
1984   Hao(bf_ind,bf_ind)+=Ha;
1985   if(beta)
1986     Hbo(bf_ind,bf_ind)+=Hb;
1987 }
1988 
eval_diag_Fxc(arma::vec & Ha,arma::vec & Hb) const1989 void AngularGrid::eval_diag_Fxc(arma::vec & Ha, arma::vec & Hb) const {
1990   if(!polarized) {
1991     ERROR_INFO();
1992     throw std::runtime_error("Refusing to compute unrestricted Fock matrix with restricted density.\n");
1993   }
1994 
1995   // Initialize memory
1996   Ha.zeros(pot_bf_ind.n_elem);
1997   Hb.zeros(pot_bf_ind.n_elem);
1998 
1999   // Screen quadrature points by small densities
2000   arma::uvec screen(screen_density());
2001   // No important grid points, return
2002   if(!screen.n_elem)
2003     return;
2004 
2005   {
2006     // LDA potential
2007     arma::rowvec vrhoa(vxc.row(0));
2008     // Multiply weights into potential
2009     vrhoa%=w;
2010     arma::rowvec vrhob(vxc.row(1));
2011     vrhob%=w;
2012     // Increment matrix
2013     for(size_t iip=0;iip<screen.n_elem;iip++) {
2014       size_t ip(screen(iip));
2015       for(size_t j=0;j<bf.n_rows;j++) {
2016 	Ha(bf_potind(j))+=vrhoa(ip)*bf(j,ip)*bf(j,ip);
2017 	Hb(bf_potind(j))+=vrhob(ip)*bf(j,ip)*bf(j,ip);
2018       }
2019     }
2020   }
2021 
2022   if(do_gga) {
2023     // Get vsigma
2024     arma::rowvec vs_aa(vsigma.row(0));
2025     arma::rowvec vs_ab(vsigma.row(1));
2026     arma::rowvec vs_bb(vsigma.row(2));
2027     // Get grad rho
2028     arma::uvec idxa(arma::linspace<arma::uvec>(0,2,3));
2029     arma::uvec idxb(arma::linspace<arma::uvec>(3,5,3));
2030     arma::mat gra0(arma::trans(grho.rows(idxa)));
2031     arma::mat grb0(arma::trans(grho.rows(idxb)));
2032 
2033     // Multiply grad rho by vsigma and the weights
2034     arma::mat gra(gra0);
2035     for(size_t i=0;i<gra.n_rows;i++)
2036       for(size_t ic=0;ic<gra.n_cols;ic++)
2037 	gra(i,ic)=w(i)*(2.0*vs_aa(i)*gra0(i,ic)+vs_ab(i)*grb0(i,ic));
2038     // Increment matrix
2039     for(size_t iip=0;iip<screen.n_elem;iip++) {
2040       size_t ip(screen(iip));
2041       for(size_t j=0;j<bf.n_rows;j++)
2042 	Ha(bf_potind(j))+=2.0 * (gra(ip,0)*bf_x(j,ip) + gra(ip,1)*bf_y(j,ip) + gra(ip,2)*bf_z(j,ip)) * bf(j,ip);
2043     }
2044 
2045     arma::mat grb(grb0);
2046     for(size_t i=0;i<grb.n_rows;i++)
2047       for(size_t ic=0;ic<grb.n_cols;ic++)
2048 	grb(i,ic)=w(i)*(2.0*vs_bb(i)*grb0(i,ic)+vs_ab(i)*gra0(i,ic));
2049     for(size_t iip=0;iip<screen.n_elem;iip++) {
2050       size_t ip(screen(iip));
2051       for(size_t j=0;j<bf.n_rows;j++)
2052 	Hb(bf_potind(j))+=2.0 * (grb(ip,0)*bf_x(j,ip) + grb(ip,1)*bf_y(j,ip) + grb(ip,2)*bf_z(j,ip)) * bf(j,ip);
2053     }
2054 
2055     if(do_mgga_t && do_mgga_l) {
2056       // Get vtau and vlapl
2057       arma::rowvec vta(vtau.row(0));
2058       arma::rowvec vla(vlapl.row(0));
2059       arma::rowvec vtb(vtau.row(1));
2060       arma::rowvec vlb(vlapl.row(1));
2061       // Scale both with weights
2062       vta%=w;
2063       vla%=w;
2064       vtb%=w;
2065       vlb%=w;
2066 
2067       // Evaluate kinetic contribution
2068       for(size_t iip=0;iip<screen.n_elem;iip++) {
2069 	size_t ip(screen(iip));
2070 	for(size_t j=0;j<bf.n_rows;j++) {
2071 	  Ha(bf_potind(j))+=(0.5*vta(ip)+2.0*vla(ip))*(bf_x(j,ip)*bf_x(j,ip) + bf_y(j,ip)*bf_y(j,ip) + bf_z(j,ip)*bf_z(j,ip));
2072 	  Hb(bf_potind(j))+=(0.5*vtb(ip)+2.0*vlb(ip))*(bf_x(j,ip)*bf_x(j,ip) + bf_y(j,ip)*bf_y(j,ip) + bf_z(j,ip)*bf_z(j,ip));
2073 	}
2074       }
2075 
2076       // Evaluate laplacian contribution.
2077       for(size_t iip=0;iip<screen.n_elem;iip++) {
2078 	size_t ip(screen(iip));
2079 	for(size_t j=0;j<bf.n_rows;j++) {
2080 	  Ha(bf_potind(j))+=2.0*vla(ip)*bf(j,ip)*bf_lapl(j,ip);
2081 	  Hb(bf_potind(j))+=2.0*vlb(ip)*bf(j,ip)*bf_lapl(j,ip);
2082 	}
2083       }
2084 
2085     } else if(do_mgga_t) {
2086       arma::rowvec vta(vtau.row(0));
2087       arma::rowvec vtb(vtau.row(1));
2088       vta%=w;
2089       vtb%=w;
2090 
2091       // Evaluate kinetic contribution
2092       for(size_t iip=0;iip<screen.n_elem;iip++) {
2093 	size_t ip(screen(iip));
2094 	for(size_t j=0;j<bf.n_rows;j++) {
2095 	  Ha(bf_potind(j))+=0.5*vta(ip)*(bf_x(j,ip)*bf_x(j,ip) + bf_y(j,ip)*bf_y(j,ip) + bf_z(j,ip)*bf_z(j,ip));
2096 	  Hb(bf_potind(j))+=0.5*vtb(ip)*(bf_x(j,ip)*bf_x(j,ip) + bf_y(j,ip)*bf_y(j,ip) + bf_z(j,ip)*bf_z(j,ip));
2097 	}
2098       }
2099 
2100     } else if(do_mgga_l) {
2101       arma::rowvec vla(vlapl.row(0));
2102       arma::rowvec vlb(vlapl.row(1));
2103       vla%=w;
2104       vlb%=w;
2105 
2106       // Evaluate kinetic contribution
2107       for(size_t iip=0;iip<screen.n_elem;iip++) {
2108 	size_t ip(screen(iip));
2109 	for(size_t j=0;j<bf.n_rows;j++) {
2110 	  Ha(bf_potind(j))+=2.0*vla(ip)*(bf_x(j,ip)*bf_x(j,ip) + bf_y(j,ip)*bf_y(j,ip) + bf_z(j,ip)*bf_z(j,ip));
2111 	  Hb(bf_potind(j))+=2.0*vlb(ip)*(bf_x(j,ip)*bf_x(j,ip) + bf_y(j,ip)*bf_y(j,ip) + bf_z(j,ip)*bf_z(j,ip));
2112 	}
2113       }
2114 
2115       // Evaluate laplacian contribution.
2116       for(size_t iip=0;iip<screen.n_elem;iip++) {
2117 	size_t ip(screen(iip));
2118 	for(size_t j=0;j<bf.n_rows;j++) {
2119 	  Ha(bf_potind(j))+=2.0*vla(ip)*bf(j,ip)*bf_lapl(j,ip);
2120 	  Hb(bf_potind(j))+=2.0*vlb(ip)*bf(j,ip)*bf_lapl(j,ip);
2121 	}
2122       }
2123     }
2124   }
2125 }
2126 
eval_force_u() const2127 arma::vec AngularGrid::eval_force_u() const {
2128   if(!polarized) {
2129     ERROR_INFO();
2130     throw std::runtime_error("Refusing to compute unrestricted force with restricted density.\n");
2131   }
2132 
2133   // Initialize force
2134   arma::vec f(3*basp->get_Nnuc());
2135   f.zeros();
2136 
2137   // Screen quadrature points by small densities
2138   arma::uvec screen(screen_density());
2139   // No important grid points, return
2140   if(!screen.n_elem)
2141     return f;
2142 
2143   // Loop over nuclei
2144   for(size_t inuc=0;inuc<basp->get_Nnuc();inuc++) {
2145     // Grad rho in grid points wrt functions centered on nucleus
2146     arma::mat gradrhoa(3,grid.size());
2147     gradrhoa.zeros();
2148     arma::mat gradrhob(3,grid.size());
2149     gradrhob.zeros();
2150     for(size_t iish=0;iish<shells.size();iish++)
2151       if(basp->get_shell_center_ind(shells[iish])==inuc) {
2152 	// Increment grad rho.
2153 	for(size_t iip=0;iip<screen.n_elem;iip++) {
2154 	  size_t ip(screen(iip));
2155 	  // Loop over functions on shell
2156 	  for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2157 	    gradrhoa(0,ip)+=bf_x(mu,ip)*Pav(mu,ip);
2158 	    gradrhoa(1,ip)+=bf_y(mu,ip)*Pav(mu,ip);
2159 	    gradrhoa(2,ip)+=bf_z(mu,ip)*Pav(mu,ip);
2160 
2161 	    gradrhob(0,ip)+=bf_x(mu,ip)*Pbv(mu,ip);
2162 	    gradrhob(1,ip)+=bf_y(mu,ip)*Pbv(mu,ip);
2163 	    gradrhob(2,ip)+=bf_z(mu,ip)*Pbv(mu,ip);
2164 	  }
2165 	}
2166       }
2167 
2168     // LDA potential
2169     arma::rowvec vrhoa(vxc.row(0));
2170     arma::rowvec vrhob(vxc.row(1));
2171     // Multiply weights into potential
2172     vrhoa%=w;
2173     vrhob%=w;
2174 
2175     // Force is
2176     f.subvec(3*inuc,3*inuc+2) += 2.0 * (gradrhoa*arma::trans(vrhoa) + gradrhob*arma::trans(vrhob));
2177 
2178     if(do_gga) {
2179       // Calculate X = 2 \sum_{u'v} P(uv) [ x(v) d_ij x(u) + (d_i x(u)) (d_j x(v)) ]
2180       //             = 2 \sum_u' Pv(u) d_ij x(u) + 2 \sum Pv_i(v) d_j x(v)
2181       arma::mat Xa(9,grid.size());
2182       Xa.zeros();
2183       arma::mat Xb(9,grid.size());
2184       Xb.zeros();
2185 
2186       for(size_t iish=0;iish<shells.size();iish++)
2187 	if(basp->get_shell_center_ind(shells[iish])==inuc) {
2188 	  // First contribution
2189 	  for(size_t iip=0;iip<screen.n_elem;iip++) {
2190 	    size_t ip(screen(iip));
2191 	    for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2192 	      for(int c=0;c<9;c++) {
2193 		Xa(c,ip)+=bf_hess(9*mu+c,ip)*Pav(mu,ip);
2194 		Xb(c,ip)+=bf_hess(9*mu+c,ip)*Pbv(mu,ip);
2195 	      }
2196 	    }
2197 	  }
2198 	  // Second contribution
2199 	  for(size_t iip=0;iip<screen.n_elem;iip++) {
2200 	    size_t ip(screen(iip));
2201 	    for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2202 	      // X is stored in column order: xx, yx, zx, xy, yy, zy, xz, yz, zz; but it's symmetric
2203 	      Xa(0,ip)+=Pav_x(mu,ip)*bf_x(mu,ip);
2204 	      Xa(1,ip)+=Pav_x(mu,ip)*bf_y(mu,ip);
2205 	      Xa(2,ip)+=Pav_x(mu,ip)*bf_z(mu,ip);
2206 
2207 	      Xa(3,ip)+=Pav_y(mu,ip)*bf_x(mu,ip);
2208 	      Xa(4,ip)+=Pav_y(mu,ip)*bf_y(mu,ip);
2209 	      Xa(5,ip)+=Pav_y(mu,ip)*bf_z(mu,ip);
2210 
2211 	      Xa(6,ip)+=Pav_z(mu,ip)*bf_x(mu,ip);
2212 	      Xa(7,ip)+=Pav_z(mu,ip)*bf_y(mu,ip);
2213 	      Xa(8,ip)+=Pav_z(mu,ip)*bf_z(mu,ip);
2214 
2215 	      Xb(0,ip)+=Pbv_x(mu,ip)*bf_x(mu,ip);
2216 	      Xb(1,ip)+=Pbv_x(mu,ip)*bf_y(mu,ip);
2217 	      Xb(2,ip)+=Pbv_x(mu,ip)*bf_z(mu,ip);
2218 
2219 	      Xb(3,ip)+=Pbv_y(mu,ip)*bf_x(mu,ip);
2220 	      Xb(4,ip)+=Pbv_y(mu,ip)*bf_y(mu,ip);
2221 	      Xb(5,ip)+=Pbv_y(mu,ip)*bf_z(mu,ip);
2222 
2223 	      Xb(6,ip)+=Pbv_z(mu,ip)*bf_x(mu,ip);
2224 	      Xb(7,ip)+=Pbv_z(mu,ip)*bf_y(mu,ip);
2225 	      Xb(8,ip)+=Pbv_z(mu,ip)*bf_z(mu,ip);
2226 	    }
2227 	  }
2228 	}
2229       // Plug in factor
2230       Xa*=2.0;
2231       Xb*=2.0;
2232 
2233       // Get potential
2234       arma::rowvec vs_aa(vsigma.row(0));
2235       arma::rowvec vs_ab(vsigma.row(1));
2236       arma::rowvec vs_bb(vsigma.row(2));
2237       // Get grad rho
2238       arma::uvec idxa(arma::linspace<arma::uvec>(0,2,3));
2239       arma::uvec idxb(arma::linspace<arma::uvec>(3,5,3));
2240       arma::mat gr_a0(arma::trans(grho.rows(idxa)));
2241       arma::mat gr_b0(arma::trans(grho.rows(idxb)));
2242       // Multiply grad rho by vsigma and the weights
2243       arma::mat gr_a(gr_a0);
2244       arma::mat gr_b(gr_b0);
2245       for(size_t i=0;i<gr_a0.n_rows;i++)
2246 	for(size_t ic=0;ic<gr_a0.n_cols;ic++) {
2247 	  gr_a(i,ic)=w(i)*(2.0*vs_aa(i)*gr_a0(i,ic) + vs_ab(i)*gr_b0(i,ic));
2248 	  gr_b(i,ic)=w(i)*(2.0*vs_bb(i)*gr_b0(i,ic) + vs_ab(i)*gr_a0(i,ic));
2249 	}
2250 
2251       // f_x <- X_xx * g_x + X_xy * g_y + X_xz * g_z
2252       f(3*inuc  )+=arma::as_scalar(Xa.row(0)*gr_a.col(0) + Xa.row(3)*gr_a.col(1) + Xa.row(6)*gr_a.col(2));
2253       f(3*inuc  )+=arma::as_scalar(Xb.row(0)*gr_b.col(0) + Xb.row(3)*gr_b.col(1) + Xb.row(6)*gr_b.col(2));
2254       // f_y <- X_yx * g_x + X_yy * g_y + X_yz * g_z
2255       f(3*inuc+1)+=arma::as_scalar(Xa.row(1)*gr_a.col(0) + Xa.row(4)*gr_a.col(1) + Xa.row(7)*gr_a.col(2));
2256       f(3*inuc+1)+=arma::as_scalar(Xb.row(1)*gr_b.col(0) + Xb.row(4)*gr_b.col(1) + Xb.row(7)*gr_b.col(2));
2257       // f_z <- X_zx * g_x + X_zy * g_y + X_zz * g_z
2258       f(3*inuc+2)+=arma::as_scalar(Xa.row(2)*gr_a.col(0) + Xa.row(5)*gr_a.col(1) + Xa.row(8)*gr_a.col(2));
2259       f(3*inuc+2)+=arma::as_scalar(Xb.row(2)*gr_b.col(0) + Xb.row(5)*gr_b.col(1) + Xb.row(8)*gr_b.col(2));
2260 
2261       if(do_mgga_t || do_mgga_l) {
2262 	// Kinetic energy and Laplacian terms
2263 
2264 	// Y = P_uv (d_i d_j x(u)) d_j x(v)
2265 	arma::mat Ya(3,grid.size());
2266 	Ya.zeros();
2267 	arma::mat Yb(3,grid.size());
2268 	Yb.zeros();
2269 	for(size_t iish=0;iish<shells.size();iish++)
2270 	  if(basp->get_shell_center_ind(shells[iish])==inuc) {
2271 	    for(size_t iip=0;iip<screen.n_elem;iip++) {
2272 	      size_t ip(screen(iip));
2273 	      for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2274 		// Y_x =  H_xx g_x + H_xy g_y + H_xz g_z
2275 		Ya(0,ip) += bf_hess(9*mu  ,ip) * Pav_x(mu,ip) + bf_hess(9*mu+3,ip) * Pav_y(mu,ip) + bf_hess(9*mu+6,ip) * Pav_z(mu,ip);
2276 		Ya(1,ip) += bf_hess(9*mu+1,ip) * Pav_x(mu,ip) + bf_hess(9*mu+4,ip) * Pav_y(mu,ip) + bf_hess(9*mu+7,ip) * Pav_z(mu,ip);
2277 		Ya(2,ip) += bf_hess(9*mu+2,ip) * Pav_x(mu,ip) + bf_hess(9*mu+5,ip) * Pav_y(mu,ip) + bf_hess(9*mu+8,ip) * Pav_z(mu,ip);
2278 
2279 		Yb(0,ip) += bf_hess(9*mu  ,ip) * Pbv_x(mu,ip) + bf_hess(9*mu+3,ip) * Pbv_y(mu,ip) + bf_hess(9*mu+6,ip) * Pbv_z(mu,ip);
2280 		Yb(1,ip) += bf_hess(9*mu+1,ip) * Pbv_x(mu,ip) + bf_hess(9*mu+4,ip) * Pbv_y(mu,ip) + bf_hess(9*mu+7,ip) * Pbv_z(mu,ip);
2281 		Yb(2,ip) += bf_hess(9*mu+2,ip) * Pbv_x(mu,ip) + bf_hess(9*mu+5,ip) * Pbv_y(mu,ip) + bf_hess(9*mu+8,ip) * Pbv_z(mu,ip);
2282 	      }
2283 	    }
2284 	  }
2285 
2286 	// Z = 2 P_uv (lapl x_v d_i x_u + x_v lapl (d_i x_u))
2287 	arma::mat Za, Zb;
2288 	if(do_mgga_l) {
2289 	  Za.zeros(3,grid.size());
2290 	  Zb.zeros(3,grid.size());
2291 	  for(size_t iish=0;iish<shells.size();iish++)
2292 	    if(basp->get_shell_center_ind(shells[iish])==inuc) {
2293 	      for(size_t iip=0;iip<screen.n_elem;iip++) {
2294 		size_t ip(screen(iip));
2295 		for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2296 		  // Z_x =
2297 		  Za(0,ip) += bf_lapl(mu,ip)*Pav_x(mu,ip) + Pav(mu)*bf_lx(mu,ip);
2298 		  Za(1,ip) += bf_lapl(mu,ip)*Pav_y(mu,ip) + Pav(mu)*bf_ly(mu,ip);
2299 		  Za(2,ip) += bf_lapl(mu,ip)*Pav_z(mu,ip) + Pav(mu)*bf_lz(mu,ip);
2300 
2301 		  Zb(0,ip) += bf_lapl(mu,ip)*Pbv_x(mu,ip) + Pbv(mu)*bf_lx(mu,ip);
2302 		  Zb(1,ip) += bf_lapl(mu,ip)*Pbv_y(mu,ip) + Pbv(mu)*bf_ly(mu,ip);
2303 		  Zb(2,ip) += bf_lapl(mu,ip)*Pbv_z(mu,ip) + Pbv(mu)*bf_lz(mu,ip);
2304 		}
2305 	      }
2306 	    }
2307 	  // Put in the factor 2
2308 	  Za*=2.0;
2309 	  Zb*=2.0;
2310 	}
2311 
2312 	if(do_mgga_t && do_mgga_l) {
2313 	  // Get vtau and vlapl
2314 	  arma::rowvec vt_a(vtau.row(0));
2315 	  arma::rowvec vt_b(vtau.row(1));
2316 	  arma::rowvec vl_a(vlapl.row(0));
2317 	  arma::rowvec vl_b(vlapl.row(1));
2318 	  // Scale both with weights
2319 	  vt_a%=w;
2320 	  vt_b%=w;
2321 	  vl_a%=w;
2322 	  vl_b%=w;
2323 
2324 	  // Increment force
2325 	  f.subvec(3*inuc, 3*inuc+2) += Ya*arma::trans(vt_a+2*vl_a) + Za*arma::trans(vl_a);
2326 	  f.subvec(3*inuc, 3*inuc+2) += Yb*arma::trans(vt_b+2*vl_b) + Zb*arma::trans(vl_b);
2327 
2328 	} else if(do_mgga_t) {
2329 	  arma::rowvec vt_a(vtau.row(0));
2330 	  arma::rowvec vt_b(vtau.row(1));
2331 	  vt_a%=w;
2332 	  vt_b%=w;
2333 
2334 	  // Increment force
2335 	  f.subvec(3*inuc, 3*inuc+2) += Ya*arma::trans(vt_a);
2336 	  f.subvec(3*inuc, 3*inuc+2) += Yb*arma::trans(vt_b);
2337 
2338 	} else if(do_mgga_l) {
2339 	  arma::rowvec vl_a(vlapl.row(0));
2340 	  arma::rowvec vl_b(vlapl.row(1));
2341 	  vl_a%=w;
2342 	  vl_b%=w;
2343 	  f.subvec(3*inuc, 3*inuc+2) += Ya*arma::trans(2*vl_a) + Za*arma::trans(vl_a);
2344 	  f.subvec(3*inuc, 3*inuc+2) += Yb*arma::trans(2*vl_b) + Zb*arma::trans(vl_b);
2345 	}
2346       }
2347     }
2348   }
2349 
2350   return f;
2351 
2352 }
2353 
eval_force_r() const2354 arma::vec AngularGrid::eval_force_r() const {
2355   if(polarized) {
2356     ERROR_INFO();
2357     throw std::runtime_error("Refusing to compute restricted force with unrestricted density.\n");
2358   }
2359 
2360   // Initialize force
2361   arma::vec f(3*basp->get_Nnuc());
2362   f.zeros();
2363 
2364   // Screen quadrature points by small densities
2365   arma::uvec screen(screen_density());
2366   // No important grid points, return
2367   if(!screen.n_elem)
2368     return f;
2369 
2370   // Loop over nuclei
2371   for(size_t inuc=0;inuc<basp->get_Nnuc();inuc++) {
2372     // Grad rho in grid points wrt functions centered on nucleus
2373     arma::mat gradrho(3,grid.size());
2374     gradrho.zeros();
2375     for(size_t iish=0;iish<shells.size();iish++)
2376       if(basp->get_shell_center_ind(shells[iish])==inuc) {
2377 	// Increment grad rho.
2378 	for(size_t iip=0;iip<screen.n_elem;iip++) {
2379 	  size_t ip(screen(iip));
2380 	  // Loop over functions on shell
2381 	  for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2382 	    gradrho(0,ip)+=bf_x(mu,ip)*Pv(mu,ip);
2383 	    gradrho(1,ip)+=bf_y(mu,ip)*Pv(mu,ip);
2384 	    gradrho(2,ip)+=bf_z(mu,ip)*Pv(mu,ip);
2385 	  }
2386 	}
2387       }
2388 
2389     // LDA potential
2390     arma::rowvec vrho(vxc.row(0));
2391     // Multiply weights into potential
2392     vrho%=w;
2393 
2394     // Force is
2395     f.subvec(3*inuc,3*inuc+2) += 2.0 * gradrho*arma::trans(vrho);
2396 
2397     if(do_gga) {
2398       // Calculate X = 2 \sum_{u'v} P(uv) [ x(v) d_ij x(u) + (d_i x(u)) (d_j x(v)) ]
2399       //             = 2 \sum_u' Pv(u) d_ij x(u) + 2 \sum Pv_i(v) d_j x(v)
2400       arma::mat X(9,grid.size());
2401       X.zeros();
2402 
2403       for(size_t iish=0;iish<shells.size();iish++)
2404 	if(basp->get_shell_center_ind(shells[iish])==inuc) {
2405 	  // First contribution
2406 	  for(size_t iip=0;iip<screen.n_elem;iip++) {
2407 	    size_t ip(screen(iip));
2408 	    for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2409 	      for(int c=0;c<9;c++) {
2410 		X(c,ip)+=bf_hess(9*mu+c,ip)*Pv(mu,ip);
2411 	      }
2412 	    }
2413 	  }
2414 	  // Second contribution
2415 	  for(size_t iip=0;iip<screen.n_elem;iip++) {
2416 	    size_t ip(screen(iip));
2417 	    for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2418 	      // X is stored in column order: xx, yx, zx, xy, yy, zy, xz, yz, zz; but it's symmetric
2419 	      X(0,ip)+=Pv_x(mu,ip)*bf_x(mu,ip);
2420 	      X(1,ip)+=Pv_x(mu,ip)*bf_y(mu,ip);
2421 	      X(2,ip)+=Pv_x(mu,ip)*bf_z(mu,ip);
2422 
2423 	      X(3,ip)+=Pv_y(mu,ip)*bf_x(mu,ip);
2424 	      X(4,ip)+=Pv_y(mu,ip)*bf_y(mu,ip);
2425 	      X(5,ip)+=Pv_y(mu,ip)*bf_z(mu,ip);
2426 
2427 	      X(6,ip)+=Pv_z(mu,ip)*bf_x(mu,ip);
2428 	      X(7,ip)+=Pv_z(mu,ip)*bf_y(mu,ip);
2429 	      X(8,ip)+=Pv_z(mu,ip)*bf_z(mu,ip);
2430 	    }
2431 	  }
2432 	}
2433       // Plug in factor
2434       X*=2.0;
2435 
2436       // Get potential
2437       arma::rowvec vs(vsigma.row(0));
2438       // Get grad rho
2439       arma::uvec idx(arma::linspace<arma::uvec>(0,2,3));
2440       arma::mat gr(arma::trans(grho.rows(idx)));
2441       // Multiply grad rho by vsigma and the weights
2442       for(size_t i=0;i<gr.n_rows;i++)
2443 	for(size_t ic=0;ic<gr.n_cols;ic++)
2444 	  gr(i,ic)=2.0*w(i)*vs(i)*gr(i,ic);
2445 
2446       // f_x <- X_xx * g_x + X_xy * g_y + X_xz * g_z
2447       f(3*inuc  )+=arma::as_scalar(X.row(0)*gr.col(0) + X.row(3)*gr.col(1) + X.row(6)*gr.col(2));
2448       // f_y <- X_yx * g_x + X_yy * g_y + X_yz * g_z
2449       f(3*inuc+1)+=arma::as_scalar(X.row(1)*gr.col(0) + X.row(4)*gr.col(1) + X.row(7)*gr.col(2));
2450       // f_z <- X_zx * g_x + X_zy * g_y + X_zz * g_z
2451       f(3*inuc+2)+=arma::as_scalar(X.row(2)*gr.col(0) + X.row(5)*gr.col(1) + X.row(8)*gr.col(2));
2452 
2453       if(do_mgga_t || do_mgga_l) {
2454 	// Kinetic energy and Laplacian terms
2455 
2456 	// Y = P_uv (d_i d_j x(u)) d_j x(v)
2457 	arma::mat Y(3,grid.size());
2458 	Y.zeros();
2459 	for(size_t iish=0;iish<shells.size();iish++)
2460 	  if(basp->get_shell_center_ind(shells[iish])==inuc) {
2461 	    for(size_t iip=0;iip<screen.n_elem;iip++) {
2462 	      size_t ip(screen(iip));
2463 	      for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2464 		// Y_x =  H_xx g_x + H_xy g_y + H_xz g_z
2465 		Y(0,ip) += bf_hess(9*mu  ,ip) * Pv_x(mu,ip) + bf_hess(9*mu+3,ip) * Pv_y(mu,ip) + bf_hess(9*mu+6,ip) * Pv_z(mu,ip);
2466 		Y(1,ip) += bf_hess(9*mu+1,ip) * Pv_x(mu,ip) + bf_hess(9*mu+4,ip) * Pv_y(mu,ip) + bf_hess(9*mu+7,ip) * Pv_z(mu,ip);
2467 		Y(2,ip) += bf_hess(9*mu+2,ip) * Pv_x(mu,ip) + bf_hess(9*mu+5,ip) * Pv_y(mu,ip) + bf_hess(9*mu+8,ip) * Pv_z(mu,ip);
2468 	      }
2469 	    }
2470 	  }
2471 
2472 	// Z = 2 P_uv (lapl x_v d_i x_u + x_v lapl (d_i x_u))
2473 	arma::mat Z;
2474 	if(do_mgga_l) {
2475 	  Z.zeros(3,grid.size());
2476 	  for(size_t iish=0;iish<shells.size();iish++)
2477 	    if(basp->get_shell_center_ind(shells[iish])==inuc) {
2478 	      for(size_t iip=0;iip<screen.n_elem;iip++) {
2479 		size_t ip(screen(iip));
2480 		for(size_t mu=bf_i0(iish);mu<bf_i0(iish)+bf_N(iish);mu++) {
2481 		  // Z_x =
2482 		  Z(0,ip) += bf_lapl(mu,ip)*Pv_x(mu,ip) + Pv(mu)*bf_lx(mu,ip);
2483 		  Z(1,ip) += bf_lapl(mu,ip)*Pv_y(mu,ip) + Pv(mu)*bf_ly(mu,ip);
2484 		  Z(2,ip) += bf_lapl(mu,ip)*Pv_z(mu,ip) + Pv(mu)*bf_lz(mu,ip);
2485 		}
2486 	      }
2487 	    }
2488 	  // Put in the factor 2
2489 	  Z*=2.0;
2490 	}
2491 
2492 	if(do_mgga_t && do_mgga_l) {
2493 	  // Get vtau and vlapl
2494 	  arma::rowvec vt(vtau.row(0));
2495 	  arma::rowvec vl(vlapl.row(0));
2496 	  // Scale both with weights
2497 	  vt%=w;
2498 	  vl%=w;
2499 
2500 	  // Increment force
2501 	  f.subvec(3*inuc, 3*inuc+2) += Y*arma::trans(vt+2*vl) + Z*arma::trans(vl);
2502 
2503 	} else if(do_mgga_t) {
2504 	  arma::rowvec vt(vtau.row(0));
2505 	  vt%=w;
2506 	  f.subvec(3*inuc, 3*inuc+2) += Y*arma::trans(vt);
2507 
2508 	} else if(do_mgga_l) {
2509 	  arma::rowvec vl(vlapl.row(0));
2510 	  vl%=w;
2511 	  f.subvec(3*inuc, 3*inuc+2) += Y*arma::trans(2*vl) + Z*arma::trans(vl);
2512 	}
2513       }
2514     }
2515   }
2516 
2517   return f;
2518 }
2519 
check_grad_tau_lapl(int x_func,int c_func)2520 void AngularGrid::check_grad_tau_lapl(int x_func, int c_func) {
2521   // Do we need gradients?
2522   do_grad=false;
2523   if(x_func>0)
2524     do_grad=do_grad || gradient_needed(x_func);
2525   if(c_func>0)
2526     do_grad=do_grad || gradient_needed(c_func);
2527 
2528   // Do we need laplacians?
2529   do_tau=false;
2530   if(x_func>0)
2531     do_tau=do_tau || tau_needed(x_func);
2532   if(c_func>0)
2533     do_tau=do_tau || tau_needed(c_func);
2534 
2535   // Do we need laplacians?
2536   do_lapl=false;
2537   if(x_func>0)
2538     do_lapl=do_lapl || laplacian_needed(x_func);
2539   if(c_func>0)
2540     do_lapl=do_lapl || laplacian_needed(c_func);
2541 }
2542 
get_grad_tau_lapl(bool & grad_,bool & tau_,bool & lap_) const2543 void AngularGrid::get_grad_tau_lapl(bool & grad_, bool & tau_, bool & lap_) const {
2544   grad_=do_grad;
2545   tau_=do_tau;
2546   lap_=do_lapl;
2547 }
2548 
set_grad_tau_lapl(bool grad_,bool tau_,bool lap_)2549 void AngularGrid::set_grad_tau_lapl(bool grad_, bool tau_, bool lap_) {
2550   do_grad=grad_;
2551   do_tau=tau_;
2552   do_lapl=lap_;
2553 }
2554 
set_hess_lgrad(bool hess,bool lgrad)2555 void AngularGrid::set_hess_lgrad(bool hess, bool lgrad) {
2556   do_hess=hess;
2557   do_lgrad=lgrad;
2558 }
2559 
2560 // Fixed size shell
construct()2561 angshell_t AngularGrid::construct() {
2562   // Form the grid.
2563   form_grid();
2564   // Return the updated info structure, holding the amount of grid
2565   // points and function values
2566   return info;
2567 }
2568 
next_grid()2569 void AngularGrid::next_grid() {
2570   if(use_lobatto)
2571     info.l+=2;
2572   else {
2573     // Need to determine what is next order of Lebedev
2574     // quadrature that is supported.
2575     info.l=next_lebedev(info.l);
2576   }
2577 }
2578 
construct(const arma::mat & P,double ftoler,int x_func,int c_func)2579 angshell_t AngularGrid::construct(const arma::mat & P, double ftoler, int x_func, int c_func) {
2580   // Construct a grid centered on (x0,y0,z0)
2581   // with nrad radial shells
2582   // See Köster et al for specifics.
2583 
2584   // Start with
2585   info.l=adaptive_l0();
2586 
2587   // Determine limit for angular quadrature
2588   int lmax=koster_lmax(ftoler);
2589 
2590   if(x_func == 0 && c_func == 0) {
2591     // No exchange or correlation!
2592     return info;
2593   }
2594 
2595   // Update shell list size
2596   form_grid();
2597   if(!pot_bf_ind.n_elem)
2598     // No points!
2599     return info;
2600 
2601   // Old and new diagonal elements of Hamiltonian
2602   arma::vec Hold, Hnew;
2603   // Compute density
2604   update_density(P);
2605   // Compute exchange and correlation.
2606   init_xc();
2607   // Compute the functionals
2608   if(x_func>0)
2609     compute_xc(x_func,true);
2610   if(c_func>0)
2611     compute_xc(c_func,true);
2612   // Clean up xc
2613   check_xc();
2614   // Construct the Fock matrix
2615   eval_diag_Fxc(Hold);
2616 
2617   // Maximum difference of diagonal elements of Hamiltonian
2618   double maxdiff;
2619 
2620   // Now, determine actual rule
2621   int l_enough=info.l;
2622   do {
2623     // Increment grid
2624     next_grid();
2625     // Form the grid using the current settings
2626     form_grid();
2627     // Compute density
2628     update_density(P);
2629     // Compute exchange and correlation.
2630     init_xc();
2631     // Compute the functionals
2632     if(x_func>0)
2633       compute_xc(x_func,true);
2634     if(c_func>0)
2635       compute_xc(c_func,true);
2636     // Clean up xc
2637     check_xc();
2638     // Construct the Fock matrix
2639     eval_diag_Fxc(Hnew);
2640 
2641     // Compute maximum difference of diagonal elements of Fock matrix
2642     maxdiff=arma::max(arma::abs(Hold-Hnew));
2643 
2644     // Switch contents
2645     std::swap(Hold,Hnew);
2646 
2647     // Increment order if tolerance not achieved.
2648     if(maxdiff>ftoler) {
2649       l_enough = info.l;
2650     }
2651   } while(maxdiff>ftoler && info.l<=lmax);
2652 
2653   // This is the value we vant
2654   info.l = l_enough;
2655 
2656   // Free memory
2657   free();
2658 
2659   return info;
2660 }
2661 
construct(const arma::mat & Pa,const arma::mat & Pb,double ftoler,int x_func,int c_func)2662 angshell_t AngularGrid::construct(const arma::mat & Pa, const arma::mat & Pb, double ftoler, int x_func, int c_func) {
2663   // Construct a grid centered on (x0,y0,z0)
2664   // with nrad radial shells
2665   // See Köster et al for specifics.
2666 
2667   // Start with
2668   info.l=adaptive_l0();
2669 
2670   if(x_func == 0 && c_func == 0) {
2671     // No exchange or correlation!
2672     return info;
2673   }
2674 
2675   // Update shell list size
2676   form_grid();
2677   if(!pot_bf_ind.n_elem)
2678     // No points!
2679     return info;
2680 
2681   // Old and new diagonal elements of Hamiltonian
2682   arma::vec Haold, Hanew, Hbold, Hbnew;
2683 
2684   // Compute density
2685   update_density(Pa,Pb);
2686 
2687   // Compute exchange and correlation.
2688   init_xc();
2689   // Compute the functionals
2690   if(x_func>0)
2691     compute_xc(x_func,true);
2692   if(c_func>0)
2693     compute_xc(c_func,true);
2694   // Clean up xc
2695   check_xc();
2696   // and construct the Fock matrices
2697   eval_diag_Fxc(Haold,Hbold);
2698 
2699   // Determine limit for angular quadrature
2700   int lmax=koster_lmax(ftoler);
2701 
2702   // Maximum difference of diagonal elements of Hamiltonian
2703   double maxdiff;
2704 
2705   // Now, determine actual quadrature limits
2706   int l_enough=info.l;
2707   do {
2708     // Increment grid
2709     next_grid();
2710     // Compute grid
2711     form_grid();
2712     // Compute density
2713     update_density(Pa,Pb);
2714 
2715     // Compute exchange and correlation.
2716     init_xc();
2717     // Compute the functionals
2718     if(x_func>0)
2719       compute_xc(x_func,true);
2720     if(c_func>0)
2721       compute_xc(c_func,true);
2722     // Clean up xc
2723     check_xc();
2724     // and construct the Fock matrices
2725     eval_diag_Fxc(Hanew,Hbnew);
2726 
2727     // Compute maximum difference of diagonal elements of Fock matrix
2728     maxdiff=std::max(arma::max(arma::abs(Haold-Hanew)),arma::max(arma::abs(Hbold-Hbnew)));
2729 
2730     // Copy contents
2731     std::swap(Haold,Hanew);
2732     std::swap(Hbold,Hbnew);
2733 
2734     // Increment order if tolerance not achieved.
2735     if(maxdiff>ftoler) {
2736       l_enough=info.l;
2737     }
2738   } while(maxdiff>ftoler && info.l<=lmax);
2739 
2740   // This is the value we vant
2741   info.l = l_enough;
2742 
2743   // Free memory
2744   free();
2745 
2746   return info;
2747 }
2748 
construct(const arma::cx_vec & C,double ftoler,int x_func,int c_func)2749 angshell_t AngularGrid::construct(const arma::cx_vec & C, double ftoler, int x_func, int c_func) {
2750   // Construct a grid centered on (x0,y0,z0)
2751   // with nrad radial shells
2752   // See Köster et al for specifics.
2753 
2754   // Start with
2755   info.l=adaptive_l0();
2756 
2757   if(x_func == 0 && c_func == 0) {
2758     // No exchange or correlation!
2759     return info;
2760   }
2761 
2762   // Update shell list size
2763   form_grid();
2764   if(!pot_bf_ind.n_elem)
2765     // No points!
2766     return info;
2767 
2768   // Determine limit for angular quadrature
2769   int lmax=koster_lmax(ftoler);
2770 
2771   // Old and new diagonal elements of Hamiltonian
2772   arma::vec Hold, Hnew, Hdum;
2773   // Compute density
2774   update_density(C);
2775 
2776   // Compute exchange and correlation.
2777   init_xc();
2778   // Compute the functionals
2779   if(x_func>0)
2780     compute_xc(x_func,true);
2781   if(c_func>0)
2782     compute_xc(c_func,true);
2783   // Clean up xc
2784   check_xc();
2785   // and construct the Fock matrices
2786   eval_diag_Fxc(Hold,Hdum);
2787 
2788   // Maximum difference of diagonal elements of Hamiltonian
2789   double maxdiff;
2790 
2791   // Now, determine actual quadrature limits
2792   int l_enough=info.l;
2793   do {
2794     // Increment grid
2795     next_grid();
2796     // Form the grid
2797     form_grid();
2798     // Compute density
2799     update_density(C);
2800 
2801     // Compute exchange and correlation.
2802     init_xc();
2803     // Compute the functionals
2804     if(x_func>0)
2805       compute_xc(x_func,true);
2806     if(c_func>0)
2807       compute_xc(c_func,true);
2808     // Clean up xc
2809     check_xc();
2810     // and construct the Fock matrices
2811     eval_diag_Fxc(Hnew,Hdum);
2812 
2813     // Compute maximum difference of diagonal elements of Fock matrix
2814     maxdiff=arma::max(arma::abs(Hold-Hnew));
2815 
2816     // Copy contents
2817     std::swap(Hold,Hnew);
2818 
2819     // Increment order if tolerance not achieved.
2820     if(maxdiff>ftoler) {
2821       l_enough = info.l;
2822     }
2823   } while(maxdiff>ftoler && info.l<=lmax);
2824 
2825   // This is the value we want
2826   info.l = l_enough;
2827 
2828   // Free memory once more
2829   free();
2830 
2831   return info;
2832 }
2833 
construct_becke(double otoler)2834 angshell_t AngularGrid::construct_becke(double otoler) {
2835   // Construct a grid centered on (x0,y0,z0)
2836   // with nrad radial shells
2837   // See Krack 1998 for details
2838 
2839   // Start with
2840   info.l=adaptive_l0();
2841 
2842   // Determine limit for angular quadrature
2843   int lmax=krack_lmax(otoler);
2844 
2845   // Update shell list size
2846   form_grid();
2847   if(!pot_bf_ind.n_elem)
2848     // No points!
2849     return info;
2850 
2851   // Old and new diagonal elements of overlap
2852   arma::vec Sold, Snew;
2853   // Initial value
2854   eval_diag_overlap(Sold);
2855 
2856   // Maximum difference of diagonal elements of Hamiltonian
2857   double maxdiff;
2858 
2859   // Now, determine actual quadrature limits
2860   int l_enough=info.l;
2861   do {
2862     // Increase quadrature
2863     next_grid();
2864     // Form grid
2865     form_grid();
2866 
2867     // Compute new overlap
2868     eval_diag_overlap(Snew);
2869 
2870     // Compute maximum difference of diagonal elements
2871     maxdiff=arma::max(arma::abs(Snew-Sold));
2872 
2873     // Copy contents
2874     std::swap(Snew,Sold);
2875 
2876     // Increment order if tolerance not achieved.
2877     if(maxdiff>otoler) {
2878       // Update value
2879       l_enough=info.l;
2880     }
2881   } while(maxdiff>otoler && info.l<=lmax);
2882 
2883   // This is the value we want
2884   info.l=l_enough;
2885 
2886   // Free memory once more
2887   free();
2888 
2889   return info;
2890 }
2891 
construct_hirshfeld(const Hirshfeld & hirsh,double otoler)2892 angshell_t AngularGrid::construct_hirshfeld(const Hirshfeld & hirsh, double otoler) {
2893   // Construct a grid centered on (x0,y0,z0)
2894   // with nrad radial shells
2895   // See Krack 1998 for details
2896 
2897   // Start with
2898   info.l=adaptive_l0();
2899 
2900   // Determine limit for angular quadrature
2901   int lmax=krack_lmax(otoler);
2902 
2903   // Old and new diagonal elements of overlap
2904   arma::vec Sold, Snew;
2905 
2906   // Form the grid
2907   form_hirshfeld_grid(hirsh);
2908   if(!pot_bf_ind.n_elem)
2909     // No points!
2910     return info;
2911 
2912   // Initial overlap
2913   eval_diag_overlap(Sold);
2914 
2915   // Maximum difference of diagonal elements of Hamiltonian
2916   double maxdiff;
2917   // Now, determine actual quadrature limits
2918   int l_enough=info.l;
2919   do {
2920     // Increment grid
2921     next_grid();
2922     // Form the grid
2923     form_hirshfeld_grid(hirsh);
2924 
2925     // Compute new overlap
2926     eval_diag_overlap(Snew);
2927 
2928     // Compute maximum difference of diagonal elements
2929     maxdiff=arma::max(arma::abs(Snew-Sold));
2930 
2931     // Copy contents
2932     std::swap(Snew,Sold);
2933 
2934     // Increment order if tolerance not achieved.
2935     if(maxdiff>otoler) {
2936       l_enough=info.l;
2937     }
2938   } while(maxdiff>otoler && info.l<=lmax);
2939 
2940   // This is the value we want
2941   info.l=l_enough;
2942 
2943   // Free memory once more
2944   free();
2945 
2946   return info;
2947 }
2948 
form_grid()2949 void AngularGrid::form_grid() {
2950   // Clear anything that already exists
2951   free();
2952 
2953   // Add grid points
2954   if(use_lobatto)
2955     lobatto_shell();
2956   else
2957     lebedev_shell();
2958 
2959   // Do Becke weights
2960   becke_weights();
2961   // Prune points with small weight
2962   prune_points();
2963   // Collect weights
2964   get_weights();
2965 
2966   // Update shell list
2967   update_shell_list();
2968   // Compute basis functions
2969   compute_bf();
2970 }
2971 
form_hirshfeld_grid(const Hirshfeld & hirsh)2972 void AngularGrid::form_hirshfeld_grid(const Hirshfeld & hirsh) {
2973   // Clear anything that already exists
2974   free();
2975 
2976   // Add grid points
2977   if(use_lobatto)
2978     lobatto_shell();
2979   else
2980     lebedev_shell();
2981 
2982   // Do Becke weights
2983   hirshfeld_weights(hirsh);
2984   // Prune points with small weight
2985   prune_points();
2986   // Collect weights
2987   get_weights();
2988 
2989   // Update shell list
2990   update_shell_list();
2991   // Compute basis functions
2992   compute_bf();
2993 }
2994 
update_shell_list()2995 void AngularGrid::update_shell_list() {
2996   // Form list of important basis functions. Shell ranges
2997   std::vector<double> shran=basp->get_shell_ranges();
2998   // Distances to other nuclei
2999   std::vector<double> nucdist=basp->get_nuclear_distances(info.atind);
3000 
3001   // Current radius
3002   double rad=info.R;
3003   // Shells that might contribute, and the amount of functions
3004   pot_shells.clear();
3005   size_t Nbf=0;
3006   for(size_t inuc=0;inuc<basp->get_Nnuc();inuc++) {
3007     // Closest distance of shell to nucleus. Covers both nucleus
3008     // inside shell, and nucleus outside the shell.
3009     double dist=fabs(nucdist[inuc]-rad);
3010     // Get indices of shells centered on nucleus
3011     std::vector<size_t> shellinds=basp->get_shell_inds(inuc);
3012 
3013     // Loop over shells on nucleus
3014     for(size_t ish=0;ish<shellinds.size();ish++) {
3015       // Shell is relevant if range is larger than minimal distance
3016       if(dist<=shran[shellinds[ish]]) {
3017 	// Add shell to list of shells to compute
3018 	pot_shells.push_back(shellinds[ish]);
3019 	// Increment amount of important functions
3020 	Nbf+=basp->get_Nbf(shellinds[ish]);
3021       }
3022     }
3023   }
3024 
3025   // Store indices of functions
3026   pot_bf_ind.zeros(Nbf);
3027   size_t ioff=0;
3028   for(size_t i=0;i<pot_shells.size();i++) {
3029     // Amount of functions on shell is
3030     size_t Nsh=basp->get_Nbf(pot_shells[i]);
3031     // Shell offset
3032     size_t sh0=basp->get_first_ind(pot_shells[i]);
3033     // Indices
3034     arma::uvec ls=(arma::linspace<arma::uvec>(sh0,sh0+Nsh-1,Nsh));
3035     pot_bf_ind.subvec(ioff,ioff+Nsh-1)=ls;
3036     ioff+=Nsh;
3037   }
3038 }
3039 
compute_bf()3040 void AngularGrid::compute_bf() {
3041   // Create list of shells that actually contribute. Shell ranges
3042   std::vector<double> shran=basp->get_shell_ranges();
3043 
3044   shells.clear();
3045   size_t Nbf=0;
3046   for(size_t is=0;is<pot_shells.size();is++) {
3047     // Shell center is
3048     coords_t cen(basp->get_shell_center(pot_shells[is]));
3049     // Shell range is
3050     double rangesq(std::pow(shran[pot_shells[is]],2));
3051 
3052     // Check if the function is important on at least one point in the
3053     // pruned grid
3054     for(size_t ip=0;ip<grid.size();ip++) {
3055       if(normsq(grid[ip].r-cen)<=rangesq) {
3056 	// Shell is important!
3057 	shells.push_back(pot_shells[is]);
3058 	Nbf+=basp->get_Nbf(pot_shells[is]);
3059 	break;
3060       }
3061     }
3062   }
3063 
3064   bf_i0.zeros(shells.size());
3065   bf_N.zeros(shells.size());
3066 
3067   // Store indices of functions
3068   bf_ind.zeros(Nbf);
3069   size_t ioff=0;
3070   for(size_t i=0;i<shells.size();i++) {
3071     // Amount of functions on shell is
3072     size_t Nsh=basp->get_Nbf(shells[i]);
3073     bf_N(i)=Nsh;
3074     // Shell offset
3075     size_t sh0=basp->get_first_ind(shells[i]);
3076     // Local offset
3077     bf_i0(i)=ioff;
3078     // Indices
3079     arma::uvec ls=(arma::linspace<arma::uvec>(sh0,sh0+Nsh-1,Nsh));
3080     bf_ind.subvec(ioff,ioff+Nsh-1)=ls;
3081     ioff+=Nsh;
3082   }
3083 
3084   // Store indices of functions on the potentials list
3085   bf_potind.zeros(Nbf);
3086   size_t j=0;
3087   ioff=0;
3088   size_t joff=0;
3089   for(size_t i=0;i<pot_shells.size() && j<shells.size();i++) {
3090     // Amount of functions on shell is
3091     size_t Nsh=basp->get_Nbf(pot_shells[i]);
3092     // Store indices?
3093     if(pot_shells[i]==shells[j]) {
3094       arma::uvec ls=(arma::linspace<arma::uvec>(joff,joff+Nsh-1,Nsh));
3095       bf_potind.subvec(ioff,ioff+Nsh-1)=ls;
3096       ioff+=Nsh;
3097       j++;
3098     }
3099     joff+=Nsh;
3100   }
3101 
3102   // Store number of function values
3103   info.nfunc=Nbf*grid.size();
3104 
3105   bf.zeros(bf_ind.n_elem,grid.size());
3106   // Loop over points
3107   for(size_t ip=0;ip<grid.size();ip++) {
3108     // Loop over shells. Offset
3109     ioff=0;
3110     for(size_t ish=0;ish<shells.size();ish++) {
3111       arma::vec fval=basp->eval_func(shells[ish],grid[ip].r.x,grid[ip].r.y,grid[ip].r.z);
3112       bf.submat(ioff,ip,ioff+fval.n_elem-1,ip)=fval;
3113       ioff+=fval.n_elem;
3114     }
3115   }
3116 
3117   if(do_grad) {
3118     bf_x.zeros(bf_ind.n_elem,grid.size());
3119     bf_y.zeros(bf_ind.n_elem,grid.size());
3120     bf_z.zeros(bf_ind.n_elem,grid.size());
3121     // Loop over points
3122     for(size_t ip=0;ip<grid.size();ip++) {
3123       // Loop over shells. Offset
3124       ioff=0;
3125       for(size_t ish=0;ish<shells.size();ish++) {
3126 	arma::mat gval=basp->eval_grad(shells[ish],grid[ip].r.x,grid[ip].r.y,grid[ip].r.z);
3127 	bf_x.submat(ioff,ip,ioff+gval.n_rows-1,ip)=gval.col(0);
3128 	bf_y.submat(ioff,ip,ioff+gval.n_rows-1,ip)=gval.col(1);
3129 	bf_z.submat(ioff,ip,ioff+gval.n_rows-1,ip)=gval.col(2);
3130 	ioff+=gval.n_rows;
3131       }
3132     }
3133   }
3134 
3135   if(do_lapl) {
3136     bf_lapl.zeros(bf_ind.n_elem,grid.size());
3137     // Loop over points
3138     for(size_t ip=0;ip<grid.size();ip++) {
3139       // Loop over shells. Offset
3140       ioff=0;
3141       for(size_t ish=0;ish<shells.size();ish++) {
3142 	arma::vec lval=basp->eval_lapl(shells[ish],grid[ip].r.x,grid[ip].r.y,grid[ip].r.z);
3143 	bf_lapl.submat(ioff,ip,ioff+lval.n_elem-1,ip)=lval;
3144 	ioff+=lval.n_elem;
3145       }
3146     }
3147   }
3148 
3149   if(do_hess) {
3150     bf_hess.zeros(9*bf_ind.n_elem,grid.size());
3151     // Loop over points
3152     for(size_t ip=0;ip<grid.size();ip++) {
3153       // Loop over shells. Offset
3154       ioff=0;
3155       for(size_t ish=0;ish<shells.size();ish++) {
3156 	// eval_hess returns Nbf x 9 matrix, transpose to 9 x Nbf
3157 	arma::mat hval=arma::trans(basp->eval_hess(shells[ish],grid[ip].r.x,grid[ip].r.y,grid[ip].r.z));
3158 	// Store values
3159 	for(int c=0;c<9;c++)
3160 	  for(size_t f=0;f<hval.n_cols;f++) {
3161 	    bf_hess(ioff + 9*f + c,ip)=hval(c,f);
3162 	  }
3163 	ioff+=hval.n_elem;
3164       }
3165     }
3166   }
3167 
3168   if(do_lgrad) {
3169     bf_lx.zeros(bf_ind.n_elem,grid.size());
3170     bf_ly.zeros(bf_ind.n_elem,grid.size());
3171     bf_lz.zeros(bf_ind.n_elem,grid.size());
3172     // Loop over points
3173     for(size_t ip=0;ip<grid.size();ip++) {
3174       // Loop over shells. Offset
3175       ioff=0;
3176       for(size_t ish=0;ish<shells.size();ish++) {
3177 	arma::mat lgval=basp->eval_laplgrad(shells[ish],grid[ip].r.x,grid[ip].r.y,grid[ip].r.z);
3178 	bf_lx.submat(ioff,ip,ioff+lgval.n_rows-1,ip)=lgval.col(0);
3179 	bf_ly.submat(ioff,ip,ioff+lgval.n_rows-1,ip)=lgval.col(1);
3180 	bf_lz.submat(ioff,ip,ioff+lgval.n_rows-1,ip)=lgval.col(2);
3181 	ioff+=lgval.n_rows;
3182       }
3183     }
3184   }
3185 }
3186 
eval_SAP(const SAP & sap,arma::mat & Vo) const3187 void AngularGrid::eval_SAP(const SAP & sap, arma::mat & Vo) const {
3188   // List of nuclei
3189   std::vector<nucleus_t> nuclei(basp->get_nuclei());
3190 
3191   // Form the potential at every grid point
3192   arma::rowvec vsap(grid.size());
3193   vsap.zeros();
3194   for(size_t inuc=0;inuc<nuclei.size();inuc++) {
3195     // No potential from ghost atoms
3196     if(nuclei[inuc].bsse)
3197       continue;
3198     for(size_t ip=0;ip<grid.size();ip++) {
3199       // Distance from grid point is
3200       double r=norm(nuclei[inuc].r-grid[ip].r);
3201       // Increment potential
3202       vsap(ip)+=sap.get(nuclei[inuc].Z,r);
3203     }
3204   }
3205   vsap%=w;
3206 
3207   // Calculate in subspace
3208   arma::mat V(bf_ind.n_elem,bf_ind.n_elem);
3209   V.zeros();
3210   increment_lda<double>(V,vsap,bf);
3211   // Increment
3212   Vo.submat(bf_ind,bf_ind)+=V;
3213 }
3214 
DFTGrid()3215 DFTGrid::DFTGrid() {
3216 }
3217 
DFTGrid(const BasisSet * bas,bool ver,bool lobatto)3218 DFTGrid::DFTGrid(const BasisSet * bas, bool ver, bool lobatto) {
3219   basp=bas;
3220   verbose=ver;
3221 
3222   // Allocate atomic grids
3223   grids.resize(bas->get_Nnuc());
3224 
3225   // Allocate work grids
3226 #ifdef _OPENMP
3227   int nth=omp_get_max_threads();
3228   for(int i=0;i<nth;i++)
3229     wrk.push_back(AngularGrid(lobatto));
3230 #else
3231   wrk.push_back(AngularGrid(lobatto));
3232 #endif
3233 
3234   // Set basis set
3235   for(size_t i=0;i<wrk.size();i++)
3236     wrk[i].set_basis(*bas);
3237 }
3238 
~DFTGrid()3239 DFTGrid::~DFTGrid() {
3240 }
3241 
set_verbose(bool ver)3242 void DFTGrid::set_verbose(bool ver) {
3243   verbose=ver;
3244 }
3245 
prune_shells()3246 void DFTGrid::prune_shells() {
3247   for(size_t i=grids.size()-1;i<grids.size();i--)
3248     if(!grids[i].np || !grids[i].nfunc)
3249       grids.erase(grids.begin()+i);
3250 }
3251 
construct(int nrad,int lmax,int x_func,int c_func,bool strict)3252 void DFTGrid::construct(int nrad, int lmax, int x_func, int c_func, bool strict) {
3253   // Check necessity of gradients and laplacians
3254   bool grad, tau, lapl;
3255   wrk[0].check_grad_tau_lapl(x_func,c_func);
3256   wrk[0].get_grad_tau_lapl(grad,tau,lapl);
3257   construct(nrad,lmax,grad,tau,lapl,strict,false);
3258 }
3259 
construct(int nrad,int lmax,bool grad,bool tau,bool lapl,bool strict,bool nl)3260 void DFTGrid::construct(int nrad, int lmax, bool grad, bool tau, bool lapl, bool strict, bool nl) {
3261   if(verbose) {
3262     if(nl)
3263       printf("Constructing static nrad=%i lmax=%i NL grid.\n",nrad,lmax);
3264     else
3265       printf("Constructing static nrad=%i lmax=%i XC grid.\n",nrad,lmax);
3266     fflush(stdout);
3267   }
3268 
3269   // Set necessity of gradienst and laplacian and grid
3270   for(size_t i=0;i<wrk.size();i++) {
3271     wrk[i].set_grad_tau_lapl(grad,tau,lapl);
3272   }
3273 
3274   // Set grid point screening tolerances
3275   double tol=strict ? 0.0 : DBL_EPSILON;
3276 
3277   // Get Chebyshev nodes and weights for radial part
3278   std::vector<double> rad, wrad;
3279   radial_chebyshev_jac(nrad,rad,wrad);
3280   nrad=rad.size(); // Sanity check
3281 
3282   // Construct grids
3283   size_t Nat=basp->get_Nnuc();
3284   grids.clear();
3285   // Loop over atoms
3286   for(size_t iat=0;iat<Nat;iat++) {
3287     angshell_t sh;
3288     sh.atind=iat;
3289     sh.cen=basp->get_nuclear_coords(iat);
3290     sh.tol=tol;
3291     sh.np=0;
3292     sh.nfunc=0;
3293 
3294     // Loop over radii
3295     for(int irad=0;irad<nrad;irad++) {
3296       sh.R=rad[irad];
3297       sh.w=wrad[irad];
3298       sh.l=lmax;
3299       grids.push_back(sh);
3300     }
3301   }
3302 
3303 #ifdef _OPENMP
3304 #pragma omp parallel
3305 #endif
3306   {
3307 
3308 #ifndef _OPENMP
3309     const int ith=0;
3310 #else
3311     const int ith=omp_get_thread_num();
3312 #pragma omp for schedule(dynamic,1)
3313 #endif
3314     for(size_t i=0;i<grids.size();i++) {
3315       wrk[ith].set_grid(grids[i]);
3316       grids[i]=wrk[ith].construct();
3317     }
3318   }
3319 
3320   // Prune empty shells
3321   prune_shells();
3322 
3323   if(verbose) {
3324     std::string met=nl ? "NL" : "XC";
3325     print_grid(met);
3326   }
3327 }
3328 
3329 
construct(const arma::mat & P,double ftoler,int x_func,int c_func)3330 void DFTGrid::construct(const arma::mat & P, double ftoler, int x_func, int c_func) {
3331   // Add all atoms
3332   if(verbose) {
3333     printf("Constructing adaptive XC grid with tolerance %e.\n",ftoler);
3334     koster_grid_info(ftoler);
3335     fflush(stdout);
3336   }
3337 
3338   Timer t;
3339 
3340   // Check necessity of gradients and laplacians
3341   for(size_t i=0;i<wrk.size();i++)
3342     wrk[i].check_grad_tau_lapl(x_func,c_func);
3343 
3344   // Amount of radial shells on the atoms
3345   std::vector<size_t> nrad(basp->get_Nnuc());
3346 
3347   // Form radial shells
3348   for(size_t iat=0;iat<basp->get_Nnuc();iat++) {
3349     angshell_t sh;
3350     sh.atind=iat;
3351     sh.cen=basp->get_nuclear_coords(iat);
3352     sh.tol=ftoler*PRUNETHR;
3353 
3354     // Compute necessary number of radial points for atom
3355     size_t nr=koster_nrad(ftoler,basp->get_Z(iat));
3356 
3357     // Get Chebyshev nodes and weights for radial part
3358     std::vector<double> rad, wrad;
3359     radial_chebyshev_jac(nr,rad,wrad);
3360     nr=rad.size(); // Sanity check
3361     nrad[iat]=nr;
3362 
3363     // Loop over radii
3364     for(size_t irad=0;irad<nr;irad++) {
3365       sh.R=rad[irad];
3366       sh.w=wrad[irad];
3367       grids.push_back(sh);
3368     }
3369   }
3370 
3371 #ifdef _OPENMP
3372 #pragma omp parallel
3373 #endif
3374   {
3375 #ifndef _OPENMP
3376     int ith=0;
3377 #else
3378     int ith=omp_get_thread_num();
3379 #pragma omp for schedule(dynamic,1)
3380 #endif
3381     for(size_t i=0;i<grids.size();i++) {
3382       wrk[ith].set_grid(grids[i]);
3383       grids[i]=wrk[ith].construct(P,ftoler/nrad[grids[i].atind],x_func,c_func);
3384     }
3385   }
3386 
3387   // Prune empty shells
3388   prune_shells();
3389 
3390   if(verbose) {
3391     printf("DFT XC grid constructed in %s.\n",t.elapsed().c_str());
3392     print_grid();
3393     fflush(stdout);
3394   }
3395 
3396 }
3397 
construct(const arma::mat & Pa,const arma::mat & Pb,double ftoler,int x_func,int c_func)3398 void DFTGrid::construct(const arma::mat & Pa, const arma::mat & Pb, double ftoler, int x_func, int c_func) {
3399   // Add all atoms
3400   if(verbose) {
3401     printf("Constructing adaptive XC grid with tolerance %e.\n",ftoler);
3402     koster_grid_info(ftoler);
3403     fflush(stdout);
3404   }
3405 
3406   Timer t;
3407 
3408   // Check necessity of gradients and laplacians
3409   for(size_t i=0;i<wrk.size();i++)
3410     wrk[i].check_grad_tau_lapl(x_func,c_func);
3411 
3412   // Amount of radial shells on the atoms
3413   std::vector<size_t> nrad(basp->get_Nnuc());
3414 
3415   // Form radial shells
3416   for(size_t iat=0;iat<basp->get_Nnuc();iat++) {
3417     angshell_t sh;
3418     sh.atind=iat;
3419     sh.cen=basp->get_nuclear_coords(iat);
3420     sh.tol=ftoler*PRUNETHR;
3421 
3422     // Compute necessary number of radial points for atom
3423     size_t nr=koster_nrad(ftoler,basp->get_Z(iat));
3424     // Get Chebyshev nodes and weights for radial part
3425     std::vector<double> rad, wrad;
3426     radial_chebyshev_jac(nr,rad,wrad);
3427     nr=rad.size(); // Sanity check
3428     nrad[iat]=nr;
3429 
3430     // Loop over radii
3431     for(size_t irad=0;irad<nr;irad++) {
3432       sh.R=rad[irad];
3433       sh.w=wrad[irad];
3434       grids.push_back(sh);
3435     }
3436   }
3437 
3438 #ifdef _OPENMP
3439 #pragma omp parallel
3440 #endif
3441   {
3442 #ifndef _OPENMP
3443     int ith=0;
3444 #else
3445     int ith=omp_get_thread_num();
3446 #pragma omp for schedule(dynamic,1)
3447 #endif
3448     for(size_t i=0;i<grids.size();i++) {
3449       wrk[ith].set_grid(grids[i]);
3450       grids[i]=wrk[ith].construct(Pa,Pb,ftoler/nrad[grids[i].atind],x_func,c_func);
3451     }
3452   }
3453 
3454   // Prune empty shells
3455   prune_shells();
3456 
3457   if(verbose) {
3458     printf("DFT XC grid constructed in %s.\n",t.elapsed().c_str());
3459     print_grid();
3460     fflush(stdout);
3461   }
3462 }
3463 
construct(const arma::cx_mat & Ctilde,double ftoler,int x_func,int c_func)3464 void DFTGrid::construct(const arma::cx_mat & Ctilde, double ftoler, int x_func, int c_func) {
3465   // Add all atoms
3466   if(verbose) {
3467     printf("Constructing adaptive XC grid with tolerance %e.\n",ftoler);
3468     koster_grid_info(ftoler);
3469     fflush(stdout);
3470   }
3471 
3472   Timer t;
3473 
3474   // Check necessity of gradients and laplacians
3475   for(size_t i=0;i<wrk.size();i++)
3476     wrk[i].check_grad_tau_lapl(x_func,c_func);
3477 
3478   // Amount of radial shells on the atoms
3479   std::vector<size_t> nrad(basp->get_Nnuc());
3480 
3481   // Form radial shells
3482   for(size_t iat=0;iat<basp->get_Nnuc();iat++) {
3483     angshell_t sh;
3484     sh.atind=iat;
3485     sh.cen=basp->get_nuclear_coords(iat);
3486     sh.tol=ftoler*PRUNETHR;
3487 
3488     // Compute necessary number of radial points for atom
3489     size_t nr=koster_nrad(ftoler,basp->get_Z(iat));
3490     // Get Chebyshev nodes and weights for radial part
3491     std::vector<double> rad, wrad;
3492     radial_chebyshev_jac(nr,rad,wrad);
3493     nr=rad.size(); // Sanity check
3494     nrad[iat]=nr;
3495 
3496     // Loop over radii
3497     for(size_t irad=0;irad<nr;irad++) {
3498       sh.R=rad[irad];
3499       sh.w=wrad[irad];
3500       grids.push_back(sh);
3501     }
3502   }
3503 
3504   // Orbital grids
3505   std::vector< std::vector<angshell_t> > orbgrid(grids.size(),grids);
3506   for(size_t i=0;i<orbgrid.size();i++)
3507     orbgrid[i].resize(Ctilde.n_cols);
3508 
3509   // Intel compiler complains about collapse...
3510   const size_t Ngrid(grids.size());
3511   const size_t Norb(Ctilde.n_cols);
3512 
3513 #ifdef _OPENMP
3514 #pragma omp parallel
3515 #endif
3516   {
3517 #ifndef _OPENMP
3518     int ith=0;
3519 #else
3520     int ith=omp_get_thread_num();
3521 
3522     // Collapse statement introduced in OpenMP 3.0 in May 2008
3523 #if _OPENMP >= 200805
3524 #pragma omp for schedule(dynamic,1) collapse(2)
3525 #else
3526 #pragma omp for schedule(dynamic,1)
3527 #endif
3528 #endif
3529     for(size_t ig=0;ig<Ngrid;ig++)
3530       for(size_t iorb=0;iorb<Norb;iorb++)
3531 	{
3532 	  wrk[ith].set_grid(grids[ig]);
3533 	  orbgrid[ig][iorb]=wrk[ith].construct(Ctilde.col(iorb),ftoler/nrad[grids[ig].atind],x_func,c_func);
3534 	}
3535   }
3536 
3537   // Update l values
3538   for(size_t ig=0;ig<grids.size();ig++) {
3539     grids[ig].l=orbgrid[ig][0].l;
3540     for(size_t io=1;io<orbgrid[ig].size();io++)
3541       grids[ig].l=std::max(orbgrid[ig][io].l,grids[ig].l);
3542   }
3543 
3544   // Rerun construction to update function value sizes
3545 #ifdef _OPENMP
3546 #pragma omp parallel
3547 #endif
3548   {
3549 
3550 #ifndef _OPENMP
3551     const int ith=0;
3552 #else
3553     const int ith=omp_get_thread_num();
3554 #pragma omp for schedule(dynamic,1)
3555 #endif
3556     for(size_t i=0;i<grids.size();i++) {
3557       wrk[ith].set_grid(grids[i]);
3558       grids[i]=wrk[ith].construct();
3559     }
3560   }
3561 
3562   // Prune empty shells
3563   prune_shells();
3564 
3565   if(verbose) {
3566     printf("DFT XC grid constructed in %s.\n",t.elapsed().c_str());
3567     print_grid();
3568     fflush(stdout);
3569   }
3570 }
3571 
krack_grid_info(double otoler) const3572 void DFTGrid::krack_grid_info(double otoler) const {
3573   printf("Maximal composition of Krack adaptive grid\n");
3574   printf("%3s %3s %4s %4s\n","idx","sym","nrad","lmax");
3575   for(size_t iat=0;iat<basp->get_Nnuc();iat++) {
3576     int nr=krack_nrad(otoler,basp->get_Z(iat));
3577     int nl=krack_lmax(otoler);
3578     printf("%3i %-3s %4i %4i\n",(int) iat+1,basp->get_symbol(iat).c_str(),nr,nl);
3579   }
3580 }
3581 
koster_grid_info(double otoler) const3582 void DFTGrid::koster_grid_info(double otoler) const {
3583   printf("Maximal composition of Koster adaptive grid\n");
3584   printf("%3s %3s %4s %4s\n","idx","sym","nrad","lmax");
3585   for(size_t iat=0;iat<basp->get_Nnuc();iat++) {
3586     int nr=koster_nrad(otoler,basp->get_Z(iat));
3587     int nl=koster_lmax(otoler);
3588     printf("%3i %-3s %4i %4i\n",(int) iat+1,basp->get_symbol(iat).c_str(),nr,nl);
3589   }
3590 }
3591 
construct_becke(double otoler)3592 void DFTGrid::construct_becke(double otoler) {
3593   if(verbose) {
3594     printf("Constructing adaptive Becke grid with tolerance %e.\n",otoler);
3595     krack_grid_info(otoler);
3596     fflush(stdout);
3597   }
3598 
3599   // Only need function values
3600   for(size_t i=0;i<wrk.size();i++)
3601     wrk[i].set_grad_tau_lapl(false,false,false);
3602 
3603   // Amount of radial shells on the atoms
3604   std::vector<size_t> nrad(basp->get_Nnuc());
3605 
3606   Timer t;
3607 
3608   // Form radial shells
3609   for(size_t iat=0;iat<basp->get_Nnuc();iat++) {
3610     angshell_t sh;
3611     sh.atind=iat;
3612     sh.cen=basp->get_nuclear_coords(iat);
3613     sh.tol=otoler*PRUNETHR;
3614 
3615     // Compute necessary number of radial points for atom
3616     size_t nr=krack_nrad(otoler,basp->get_Z(iat));
3617 
3618     // Get Chebyshev nodes and weights for radial part
3619     std::vector<double> rad, wrad;
3620     radial_chebyshev_jac(nr,rad,wrad);
3621     nr=rad.size(); // Sanity check
3622     nrad[iat]=nr;
3623 
3624     // Loop over radii
3625     for(size_t irad=0;irad<nr;irad++) {
3626       sh.R=rad[irad];
3627       sh.w=wrad[irad];
3628       grids.push_back(sh);
3629     }
3630   }
3631 
3632 #ifdef _OPENMP
3633 #pragma omp parallel
3634 #endif
3635   {
3636 #ifndef _OPENMP
3637     int ith=0;
3638 #else
3639     int ith=omp_get_thread_num();
3640 #pragma omp for schedule(dynamic,1)
3641 #endif
3642     for(size_t i=0;i<grids.size();i++) {
3643       wrk[ith].set_grid(grids[i]);
3644       grids[i]=wrk[ith].construct_becke(otoler/nrad[grids[i].atind]);
3645     }
3646   }
3647 
3648   // Prune empty shells
3649   prune_shells();
3650 
3651   if(verbose) {
3652     printf("Becke grid constructed in %s.\n",t.elapsed().c_str());
3653     print_grid("Becke");
3654     fflush(stdout);
3655   }
3656 }
3657 
construct_hirshfeld(const Hirshfeld & hirsh,double otoler)3658 void DFTGrid::construct_hirshfeld(const Hirshfeld & hirsh, double otoler) {
3659   if(verbose) {
3660     printf("Constructing adaptive Hirshfeld grid with tolerance %e.\n",otoler);
3661     krack_grid_info(otoler);
3662     fflush(stdout);
3663   }
3664 
3665   // Only need function values
3666   for(size_t i=0;i<wrk.size();i++)
3667     wrk[i].set_grad_tau_lapl(false,false,false);
3668 
3669   // Amount of radial shells on the atoms
3670   std::vector<size_t> nrad(basp->get_Nnuc());
3671 
3672   Timer t;
3673 
3674   // Form radial shells
3675   for(size_t iat=0;iat<basp->get_Nnuc();iat++) {
3676     angshell_t sh;
3677     sh.atind=iat;
3678     sh.cen=basp->get_nuclear_coords(iat);
3679     sh.tol=otoler*PRUNETHR;
3680 
3681     // Compute necessary number of radial points for atom
3682     size_t nr=krack_nrad(otoler,basp->get_Z(iat));
3683 
3684     // Get Chebyshev nodes and weights for radial part
3685     std::vector<double> rad, wrad;
3686     radial_chebyshev_jac(nr,rad,wrad);
3687     nr=rad.size(); // Sanity check
3688     nrad[iat]=nr;
3689 
3690     // Loop over radii
3691     for(size_t irad=0;irad<nr;irad++) {
3692       sh.R=rad[irad];
3693       sh.w=wrad[irad];
3694       grids.push_back(sh);
3695     }
3696   }
3697 
3698 #ifdef _OPENMP
3699 #pragma omp parallel
3700 #endif
3701   {
3702 #ifndef _OPENMP
3703     int ith=0;
3704 #else
3705     int ith=omp_get_thread_num();
3706 #pragma omp for schedule(dynamic,1)
3707 #endif
3708     for(size_t i=0;i<grids.size();i++) {
3709       wrk[ith].set_grid(grids[i]);
3710       grids[i]=wrk[ith].construct_hirshfeld(hirsh,otoler/nrad[grids[i].atind]);
3711     }
3712   }
3713 
3714   // Prune empty shells
3715   prune_shells();
3716 
3717   if(verbose) {
3718     printf("Hirshfeld grid constructed in %s.\n",t.elapsed().c_str());
3719     print_grid("Hirshfeld");
3720     fflush(stdout);
3721   }
3722 }
3723 
get_Npoints() const3724 size_t DFTGrid::get_Npoints() const {
3725   size_t np=0;
3726   for(size_t i=0;i<grids.size();i++)
3727     np+=grids[i].np;
3728   return np;
3729 }
3730 
get_Nfuncs() const3731 size_t DFTGrid::get_Nfuncs() const {
3732   size_t nf=0;
3733   for(size_t i=0;i<grids.size();i++)
3734     nf+=grids[i].nfunc;
3735   return nf;
3736 }
3737 
eval_overlap()3738 arma::mat DFTGrid::eval_overlap() {
3739   // Amount of basis functions
3740   size_t N=basp->get_Nbf();
3741 
3742   // Returned matrix
3743   arma::mat S(N,N);
3744   S.zeros();
3745 
3746 #ifdef _OPENMP
3747 #pragma omp parallel
3748 #endif
3749   {
3750 #ifndef _OPENMP
3751     int ith=0;
3752 #else
3753     int ith=omp_get_thread_num();
3754 #endif
3755 
3756     // Work array
3757     arma::mat Swrk(S);
3758     Swrk.zeros();
3759 
3760 #ifdef _OPENMP
3761 #pragma omp for schedule(dynamic,1)
3762 #endif
3763     for(size_t i=0;i<grids.size();i++) {
3764       // Change atom and create grid
3765       wrk[ith].set_grid(grids[i]);
3766       wrk[ith].form_grid();
3767       // Evaluate overlap
3768       wrk[ith].eval_overlap(Swrk);
3769 
3770       // Free memory
3771       wrk[ith].free();
3772     }
3773 
3774 #ifdef _OPENMP
3775 #pragma omp critical
3776 #endif
3777     S+=Swrk;
3778   }
3779 
3780   return S;
3781 }
3782 
eval_overlap(size_t inuc)3783 arma::mat DFTGrid::eval_overlap(size_t inuc) {
3784   // Amount of basis functions
3785   size_t N=basp->get_Nbf();
3786 
3787   // Returned matrix
3788   arma::mat Sat(N,N);
3789   Sat.zeros();
3790 
3791 #ifdef _OPENMP
3792 #pragma omp parallel
3793 #endif
3794   {
3795 #ifndef _OPENMP
3796     int ith=0;
3797 #else
3798     int ith=omp_get_thread_num();
3799 #endif
3800 
3801     // Work array
3802     arma::mat Swrk(Sat);
3803     Swrk.zeros();
3804 
3805 #ifdef _OPENMP
3806 #pragma omp for schedule(dynamic,1)
3807 #endif
3808     for(size_t i=0;i<grids.size();i++) {
3809       if(grids[i].atind!=inuc)
3810 	continue;
3811 
3812       // Change atom and create grid
3813       wrk[ith].set_grid(grids[i]);
3814       wrk[ith].form_grid();
3815       // Evaluate overlap
3816       wrk[ith].eval_overlap(Swrk);
3817 
3818       // Free memory
3819       wrk[ith].free();
3820     }
3821 
3822 #ifdef _OPENMP
3823 #pragma omp critical
3824 #endif
3825     Sat+=Swrk;
3826   }
3827 
3828   return Sat;
3829 }
3830 
eval_overlaps()3831 std::vector<arma::mat> DFTGrid::eval_overlaps() {
3832   // Amount of basis functions
3833   size_t N=basp->get_Nbf();
3834 
3835   // Returned matrices
3836   std::vector<arma::mat> Sat(basp->get_Nnuc());
3837   for(size_t inuc=0;inuc<basp->get_Nnuc();inuc++)
3838     Sat[inuc].zeros(N,N);
3839 
3840 #ifdef _OPENMP
3841 #pragma omp parallel
3842 #endif
3843   {
3844 
3845 #ifdef _OPENMP
3846     int ith=omp_get_thread_num();
3847 #else
3848     int ith=0;
3849 #endif
3850 
3851     // Temporary matrix
3852     arma::mat Swrk(N,N);
3853     Swrk.zeros();
3854 
3855     // Add atomic contributions
3856 #ifdef _OPENMP
3857 #pragma omp for schedule(dynamic,1)
3858 #endif
3859     for(size_t i=0;i<grids.size();i++) {
3860       // Change atom and create grid
3861       wrk[ith].set_grid(grids[i]);
3862       wrk[ith].form_grid();
3863 
3864       // Evaluate overlap
3865       wrk[ith].eval_overlap(Swrk);
3866 #ifdef _OPENMP
3867 #pragma omp critical
3868 #endif
3869       Sat[grids[i].atind]+=Swrk;
3870 
3871       // Free memory
3872       wrk[ith].free();
3873     }
3874   }
3875 
3876   return Sat;
3877 }
3878 
eval_overlap(const arma::cx_mat & Cocc,size_t io,double k,double thr)3879 arma::mat DFTGrid::eval_overlap(const arma::cx_mat & Cocc, size_t io, double k, double thr) {
3880   // Amount of basis functions
3881   size_t N=basp->get_Nbf();
3882 
3883   // Returned matrices
3884   arma::mat S(N,N);
3885   S.zeros();
3886 
3887 #ifdef _OPENMP
3888 #pragma omp parallel
3889 #endif
3890   {
3891 #ifdef _OPENMP
3892     int ith=omp_get_thread_num();
3893 
3894     // Temporary matrix
3895     arma::mat Swrk(N,N);
3896     Swrk.zeros();
3897 #else
3898     int ith=0;
3899 #endif
3900 
3901 #ifdef _OPENMP
3902 #pragma omp for schedule(dynamic,1)
3903 #endif
3904     for(size_t i=0;i<grids.size();i++) {
3905       // Change atom and create grid
3906       wrk[ith].set_grid(grids[i]);
3907       wrk[ith].form_grid();
3908       // Evaluate overlap
3909 #ifdef _OPENMP
3910       wrk[ith].eval_overlap(Cocc,io,k,Swrk,thr);
3911 #else
3912       wrk[ith].eval_overlap(Cocc,io,k,S,thr);
3913 #endif
3914       // Free memory
3915       wrk[ith].free();
3916     }
3917 
3918 #ifdef _OPENMP
3919 #pragma omp critical
3920     S+=Swrk;
3921 #endif
3922   }
3923 
3924   return S;
3925 }
3926 
eval_overlap(const arma::cx_mat & Cocc,const arma::vec & Esi,double k,double thr)3927 arma::mat DFTGrid::eval_overlap(const arma::cx_mat & Cocc, const arma::vec & Esi, double k, double thr) {
3928   // Amount of basis functions
3929   size_t N=basp->get_Nbf();
3930 
3931   // Returned matrices
3932   arma::mat S(N,N);
3933   S.zeros();
3934 
3935 #ifdef _OPENMP
3936 #pragma omp parallel
3937 #endif
3938   {
3939 #ifdef _OPENMP
3940     int ith=omp_get_thread_num();
3941 
3942     // Temporary matrix
3943     arma::mat Swrk(N,N);
3944     Swrk.zeros();
3945 #else
3946     int ith=0;
3947 #endif
3948 
3949 #ifdef _OPENMP
3950 #pragma omp for schedule(dynamic,1)
3951 #endif
3952     for(size_t i=0;i<grids.size();i++) {
3953       // Change atom and create grid
3954       wrk[ith].set_grid(grids[i]);
3955       wrk[ith].form_grid();
3956       // Evaluate overlap
3957 #ifdef _OPENMP
3958       wrk[ith].eval_overlap(Cocc,Esi,k,Swrk,thr);
3959 #else
3960       wrk[ith].eval_overlap(Cocc,Esi,k,S,thr);
3961 #endif
3962       // Free memory
3963       wrk[ith].free();
3964     }
3965 
3966 #ifdef _OPENMP
3967 #pragma omp critical
3968     S+=Swrk;
3969 #endif
3970   }
3971 
3972   return S;
3973 }
3974 
eval_tau_overlap(const arma::cx_mat & Cocc,double k,double thr)3975 arma::mat DFTGrid::eval_tau_overlap(const arma::cx_mat & Cocc, double k, double thr) {
3976   // Amount of basis functions
3977   size_t N=basp->get_Nbf();
3978 
3979   // Returned matrices
3980   arma::mat S(N,N);
3981   S.zeros();
3982 
3983 #ifdef _OPENMP
3984 #pragma omp parallel
3985 #endif
3986   {
3987 #ifdef _OPENMP
3988     int ith=omp_get_thread_num();
3989 
3990     // Temporary matrix
3991     arma::mat Swrk(N,N);
3992     Swrk.zeros();
3993 #else
3994     int ith=0;
3995 #endif
3996 
3997 #ifdef _OPENMP
3998 #pragma omp for schedule(dynamic,1)
3999 #endif
4000     for(size_t i=0;i<grids.size();i++) {
4001       // Change atom and create grid
4002       wrk[ith].set_grid(grids[i]);
4003       wrk[ith].set_grad_tau_lapl(true,false,false);
4004       wrk[ith].form_grid();
4005       // Evaluate overlap
4006 #ifdef _OPENMP
4007       wrk[ith].eval_tau_overlap(Cocc,k,Swrk,thr);
4008 #else
4009       wrk[ith].eval_tau_overlap(Cocc,k,S,thr);
4010 #endif
4011       // Free memory
4012       wrk[ith].free();
4013     }
4014 
4015 #ifdef _OPENMP
4016 #pragma omp critical
4017     S+=Swrk;
4018 #endif
4019   }
4020 
4021   return S;
4022 }
4023 
eval_tau_overlap_deriv(const arma::cx_mat & Cocc,const arma::vec & Esi,double k,double thr)4024 arma::mat DFTGrid::eval_tau_overlap_deriv(const arma::cx_mat & Cocc, const arma::vec & Esi, double k, double thr) {
4025   // Amount of basis functions
4026   size_t N=basp->get_Nbf();
4027 
4028   // Returned matrices
4029   arma::mat S(N,N);
4030   S.zeros();
4031 
4032 #ifdef _OPENMP
4033 #pragma omp parallel
4034 #endif
4035   {
4036 #ifdef _OPENMP
4037     int ith=omp_get_thread_num();
4038 
4039     // Temporary matrix
4040     arma::mat Swrk(N,N);
4041     Swrk.zeros();
4042 #else
4043     int ith=0;
4044 #endif
4045 
4046 #ifdef _OPENMP
4047 #pragma omp for schedule(dynamic,1)
4048 #endif
4049     for(size_t i=0;i<grids.size();i++) {
4050       // Change atom and create grid
4051       wrk[ith].set_grid(grids[i]);
4052       wrk[ith].set_grad_tau_lapl(true,false,false);
4053       wrk[ith].form_grid();
4054       // Evaluate overlap
4055 #ifdef _OPENMP
4056       wrk[ith].eval_tau_overlap_deriv(Cocc,Esi,k,Swrk,thr);
4057 #else
4058       wrk[ith].eval_tau_overlap_deriv(Cocc,Esi,k,S,thr);
4059 #endif
4060       // Free memory
4061       wrk[ith].free();
4062     }
4063 
4064 #ifdef _OPENMP
4065 #pragma omp critical
4066     S+=Swrk;
4067 #endif
4068   }
4069 
4070   return S;
4071 }
4072 
eval_hirshfeld_overlap(const Hirshfeld & hirsh,size_t inuc)4073 arma::mat DFTGrid::eval_hirshfeld_overlap(const Hirshfeld & hirsh, size_t inuc) {
4074   // Amount of basis functions
4075   size_t N=basp->get_Nbf();
4076 
4077   // Returned matrices
4078   arma::mat Sat(N,N);
4079   Sat.zeros();
4080 
4081 #ifdef _OPENMP
4082   int ith=omp_get_thread_num();
4083 #else
4084   int ith=0;
4085 #endif
4086 
4087   // Change atom and create grid
4088   for(size_t i=0;i<grids.size();i++) {
4089     if(grids[i].atind!=inuc)
4090       continue;
4091 
4092     wrk[ith].set_grid(grids[i]);
4093     wrk[ith].form_hirshfeld_grid(hirsh);
4094     // Evaluate overlap
4095     wrk[ith].eval_overlap(Sat);
4096     // Free memory
4097     wrk[ith].free();
4098   }
4099 
4100   return Sat;
4101 }
4102 
eval_hirshfeld_overlaps(const Hirshfeld & hirsh)4103 std::vector<arma::mat> DFTGrid::eval_hirshfeld_overlaps(const Hirshfeld & hirsh) {
4104   // Amount of basis functions
4105   size_t N=basp->get_Nbf();
4106 
4107   // Returned matrices
4108   std::vector<arma::mat> Sat(basp->get_Nnuc());
4109   for(size_t inuc=0;inuc<basp->get_Nnuc();inuc++)
4110     Sat[inuc].zeros(N,N);
4111 
4112 #ifdef _OPENMP
4113 #pragma omp parallel
4114 #endif
4115   {
4116 #ifdef _OPENMP
4117     int ith=omp_get_thread_num();
4118 #else
4119     int ith=0;
4120 #endif
4121 
4122     // Work array
4123     arma::mat Swrk(Sat[0]);
4124     Swrk.zeros();
4125 
4126     // Add atomic contributions
4127 #ifdef _OPENMP
4128 #pragma omp for schedule(dynamic,1)
4129 #endif
4130     for(size_t i=0;i<grids.size();i++) {
4131       // Change atom and create grid
4132       wrk[ith].set_grid(grids[i]);
4133       wrk[ith].form_hirshfeld_grid(hirsh);
4134       // Evaluate overlap
4135       wrk[ith].eval_overlap(Swrk);
4136       // Free memory
4137       wrk[ith].free();
4138 
4139       // Increment total matrix
4140 #ifdef _OPENMP
4141 #pragma omp critical
4142 #endif
4143       Sat[grids[i].atind]+=Swrk;
4144     }
4145   }
4146 
4147   return Sat;
4148 }
4149 
eval_dens_list(const arma::mat & P)4150 std::vector<dens_list_t> DFTGrid::eval_dens_list(const arma::mat & P) {
4151   std::vector<dens_list_t> list;
4152 
4153 #ifdef _OPENMP
4154 #pragma omp parallel
4155 #endif
4156   {
4157 
4158 #ifdef _OPENMP
4159     int ith=omp_get_thread_num();
4160 #else
4161     int ith=0;
4162 #endif
4163 
4164     // Temporary list
4165     std::vector<dens_list_t> hlp;
4166 
4167 #ifdef _OPENMP
4168 #pragma omp for schedule(dynamic,1)
4169 #endif
4170     // Loop over integral grid
4171     for(size_t i=0;i<grids.size();i++) {
4172       // Change atom and create grid
4173       wrk[ith].set_grid(grids[i]);
4174       wrk[ith].form_grid();
4175       // Update density
4176       wrk[ith].update_density(P);
4177 
4178       // Compute helper
4179       hlp.clear();
4180       wrk[ith].get_density(hlp);
4181 
4182 #ifdef _OPENMP
4183 #pragma omp critical
4184 #endif
4185       // Add to full list
4186       list.insert(list.end(),hlp.begin(),hlp.end());
4187 
4188       // Free memory
4189       wrk[ith].free();
4190     }
4191   }
4192 
4193   // Sort the list
4194   std::sort(list.begin(),list.end());
4195 
4196   return list;
4197 }
4198 
compute_Nel(const arma::mat & P)4199 double DFTGrid::compute_Nel(const arma::mat & P) {
4200   double Nel=0.0;
4201 
4202 #ifdef _OPENMP
4203 #pragma omp parallel reduction(+:Nel)
4204 #endif
4205   {
4206 
4207 #ifdef _OPENMP
4208     int ith=omp_get_thread_num();
4209 #else
4210     int ith=0;
4211 #endif
4212 
4213 #ifdef _OPENMP
4214 #pragma omp for schedule(dynamic,1)
4215 #endif
4216     // Loop over integral grid
4217     for(size_t i=0;i<grids.size();i++) {
4218       // Change atom and create grid
4219       wrk[ith].set_grid(grids[i]);
4220       wrk[ith].form_grid();
4221       // Update density
4222       wrk[ith].update_density(P);
4223       // Integrate electrons
4224       Nel+=wrk[ith].compute_Nel();
4225       // Free memory
4226       wrk[ith].free();
4227     }
4228   }
4229 
4230   return Nel;
4231 }
4232 
compute_Nel(const arma::mat & Pa,const arma::mat & Pb)4233 double DFTGrid::compute_Nel(const arma::mat & Pa, const arma::mat & Pb) {
4234   double Nel=0.0;
4235 
4236 #ifdef _OPENMP
4237 #pragma omp parallel reduction(+:Nel)
4238 #endif
4239   {
4240 
4241 #ifdef _OPENMP
4242     int ith=omp_get_thread_num();
4243 #else
4244     int ith=0;
4245 #endif
4246 
4247 #ifdef _OPENMP
4248 #pragma omp for schedule(dynamic,1)
4249 #endif
4250     // Loop over integral grid
4251     for(size_t i=0;i<grids.size();i++) {
4252       // Change atom and create grid
4253       wrk[ith].set_grid(grids[i]);
4254       wrk[ith].form_grid();
4255       // Update density
4256       wrk[ith].update_density(Pa,Pb);
4257       // Integrate electrons
4258       Nel+=wrk[ith].compute_Nel();
4259       // Free memory
4260       wrk[ith].free();
4261     }
4262   }
4263 
4264   return Nel;
4265 }
4266 
compute_atomic_Nel(const arma::mat & P)4267 arma::vec DFTGrid::compute_atomic_Nel(const arma::mat & P) {
4268   arma::vec Nel(basp->get_Nnuc());
4269   Nel.zeros();
4270 
4271 #ifdef _OPENMP
4272 #pragma omp parallel
4273 #endif
4274   {
4275 
4276 #ifdef _OPENMP
4277     int ith=omp_get_thread_num();
4278 #else
4279     int ith=0;
4280 #endif
4281 
4282 #ifdef _OPENMP
4283 #pragma omp for schedule(dynamic,1)
4284 #endif
4285     // Loop over integral grid
4286     for(size_t i=0;i<grids.size();i++) {
4287       // Change atom and create grid
4288       wrk[ith].set_grid(grids[i]);
4289       wrk[ith].form_grid();
4290       // Update density
4291       wrk[ith].update_density(P);
4292 
4293       // Integrate electron density
4294       double dN(wrk[ith].compute_Nel());
4295 #ifdef _OPENMP
4296 #pragma omp critical
4297 #endif
4298       Nel(grids[i].atind)+=dN;
4299 
4300       // Free memory
4301       wrk[ith].free();
4302     }
4303   }
4304 
4305   return Nel;
4306 }
4307 
4308 
compute_atomic_Nel(const Hirshfeld & hirsh,const arma::mat & P)4309 arma::vec DFTGrid::compute_atomic_Nel(const Hirshfeld & hirsh, const arma::mat & P) {
4310   arma::vec Nel(basp->get_Nnuc());
4311   Nel.zeros();
4312 
4313 #ifdef _OPENMP
4314 #pragma omp parallel
4315 #endif
4316   {
4317 
4318 #ifdef _OPENMP
4319     int ith=omp_get_thread_num();
4320 #else
4321     int ith=0;
4322 #endif
4323 
4324 #ifdef _OPENMP
4325 #pragma omp for schedule(dynamic,1)
4326 #endif
4327     // Loop over integral grid
4328     for(size_t i=0;i<grids.size();i++) {
4329       // Change atom and create grid
4330       wrk[ith].set_grid(grids[i]);
4331       wrk[ith].form_hirshfeld_grid(hirsh);
4332       // Update density
4333       wrk[ith].update_density(P);
4334       // Integrate electrons
4335       double dN=wrk[ith].compute_Nel();
4336 #ifdef _OPENMP
4337 #pragma omp critical
4338 #endif
4339       Nel[grids[i].atind]+=dN;
4340       // Free memory
4341       wrk[ith].free();
4342     }
4343   }
4344 
4345   return Nel;
4346 }
4347 
eval_Fxc(int x_func,int c_func,const arma::mat & P,arma::mat & H,double & Excv,double & Nelv)4348 void DFTGrid::eval_Fxc(int x_func, int c_func, const arma::mat & P, arma::mat & H, double & Excv, double & Nelv) {
4349   // Clear Hamiltonian
4350   H.zeros(P.n_rows,P.n_cols);
4351   // Clear exchange-correlation energy
4352   double Ex=0.0, Ec=0.0;
4353   // Clear number of electrons
4354   double Nel=0.0;
4355 
4356 #ifdef _OPENMP
4357   // Get (maximum) number of threads
4358   int maxt=omp_get_max_threads();
4359 
4360   // Stack of work arrays
4361   std::vector<arma::mat> Hwrk;
4362 
4363   for(int i=0;i<maxt;i++) {
4364     Hwrk.push_back(arma::mat(H.n_rows,H.n_cols));
4365     Hwrk[i].zeros();
4366   }
4367 
4368 #pragma omp parallel shared(Hwrk) reduction(+:Nel,Ex,Ec)
4369 #endif
4370   { // Begin parallel region
4371 
4372 #ifdef _OPENMP
4373     // Current thread is
4374     int ith=omp_get_thread_num();
4375 #else
4376     int ith=0;
4377 #endif
4378 
4379     // Loop over atoms
4380 #ifdef _OPENMP
4381 #pragma omp for schedule(dynamic,1)
4382 #endif
4383     for(size_t i=0;i<grids.size();i++) {
4384       // Change atom and create grid
4385       wrk[ith].set_grid(grids[i]);
4386       wrk[ith].form_grid();
4387 
4388       // Update density
4389       Timer tp;
4390       wrk[ith].update_density(P);
4391       // Update number of electrons
4392       Nel+=wrk[ith].compute_Nel();
4393 
4394       // Initialize the arrays
4395       wrk[ith].init_xc();
4396       // Compute the functionals
4397       if(x_func>0) {
4398 	wrk[ith].compute_xc(x_func,true);
4399         wrk[ith].check_xc();
4400 	// Increment exchange energy
4401 	Ex+=wrk[ith].eval_Exc();
4402 	// Zero out array
4403 	wrk[ith].zero_Exc();
4404       }
4405       if(c_func>0) {
4406 	wrk[ith].compute_xc(c_func,true);
4407         wrk[ith].check_xc();
4408 	// Increment exchange energy
4409 	Ec+=wrk[ith].eval_Exc();
4410 	// Zero out array
4411 	wrk[ith].zero_Exc();
4412       }
4413 
4414       // and construct the Fock matrices
4415 
4416 #ifdef _OPENMP
4417       wrk[ith].eval_Fxc(Hwrk[ith]);
4418 #else
4419       wrk[ith].eval_Fxc(H);
4420 #endif
4421 
4422       // Free memory
4423       wrk[ith].free();
4424     }
4425   } // End parallel region
4426 
4427 #ifdef _OPENMP
4428   // Sum results
4429   for(int i=0;i<maxt;i++)
4430     H+=Hwrk[i];
4431 #endif
4432 
4433   //printf("\nExchange    energy % .10f\n",Ex);
4434   //printf("Correlation energy % .10f\n",Ec);
4435 
4436   Excv=Ex+Ec;
4437   Nelv=Nel;
4438 }
4439 
4440 
eval_Fxc(int x_func,int c_func,const arma::mat & Pa,const arma::mat & Pb,arma::mat & Ha,arma::mat & Hb,double & Excv,double & Nelv)4441 void DFTGrid::eval_Fxc(int x_func, int c_func, const arma::mat & Pa, const arma::mat & Pb, arma::mat & Ha, arma::mat & Hb, double & Excv, double & Nelv) {
4442   // Clear Hamiltonian
4443   Ha.zeros(Pa.n_rows,Pa.n_cols);
4444   Hb.zeros(Pb.n_rows,Pb.n_cols);
4445   // Clear exchange-correlation energy
4446   double Ex=0.0, Ec=0.0;
4447   // Clear number of electrons
4448   double Nel=0.0;
4449 
4450 #ifdef _OPENMP
4451   // Get (maximum) number of threads
4452   int maxt=omp_get_max_threads();
4453 
4454   // Stack of work arrays
4455   std::vector<arma::mat> Hawrk, Hbwrk;
4456 
4457   for(int i=0;i<maxt;i++) {
4458     Hawrk.push_back(arma::mat(Ha.n_rows,Ha.n_cols));
4459     Hawrk[i].zeros();
4460 
4461     Hbwrk.push_back(arma::mat(Hb.n_rows,Hb.n_cols));
4462     Hbwrk[i].zeros();
4463   }
4464 
4465 #pragma omp parallel shared(Hawrk,Hbwrk) reduction(+:Nel,Ex,Ec)
4466 #endif
4467   { // Begin parallel region
4468 
4469 #ifdef _OPENMP
4470     // Current thread is
4471     int ith=omp_get_thread_num();
4472 #else
4473     int ith=0;
4474 #endif
4475 
4476     // Loop over atoms
4477 #ifdef _OPENMP
4478 #pragma omp for schedule(dynamic,1)
4479 #endif
4480     for(size_t i=0;i<grids.size();i++) {
4481 
4482       // Change atom and create grid
4483       wrk[ith].set_grid(grids[i]);
4484       wrk[ith].form_grid();
4485 
4486       // Update density
4487       wrk[ith].update_density(Pa,Pb);
4488       // Update number of electrons
4489       Nel+=wrk[ith].compute_Nel();
4490 
4491       // Initialize the arrays
4492       wrk[ith].init_xc();
4493       // Compute the functionals
4494       if(x_func>0) {
4495 	wrk[ith].compute_xc(x_func,true);
4496         wrk[ith].check_xc();
4497 	// Evaluate the energy
4498 	Ex+=wrk[ith].eval_Exc();
4499 	wrk[ith].zero_Exc();
4500       }
4501       if(c_func>0) {
4502 	wrk[ith].compute_xc(c_func,true);
4503         wrk[ith].check_xc();
4504 	// Evaluate the energy
4505 	Ec+=wrk[ith].eval_Exc();
4506 	wrk[ith].zero_Exc();
4507       }
4508 
4509       // and construct the Fock matrices
4510 #ifdef _OPENMP
4511       wrk[ith].eval_Fxc(Hawrk[ith],Hbwrk[ith]);
4512 #else
4513       wrk[ith].eval_Fxc(Ha,Hb);
4514 #endif
4515 
4516       // Free memory
4517       wrk[ith].free();
4518     }
4519   } // End parallel region
4520 
4521 #ifdef _OPENMP
4522   // Sum results
4523   for(int i=0;i<maxt;i++) {
4524     Ha+=Hawrk[i];
4525     Hb+=Hbwrk[i];
4526   }
4527 #endif
4528 
4529   //printf("\nExchange    energy % .10f\n",Ex);
4530   //printf("Correlation energy % .10f\n",Ec);
4531 
4532   Excv=Ex+Ec;
4533   Nelv=Nel;
4534 }
4535 
eval_Fxc(int x_func,int c_func,const arma::cx_mat & CW,std::vector<arma::mat> & H,std::vector<double> & Exc,std::vector<double> & Nel,bool fock)4536 void DFTGrid::eval_Fxc(int x_func, int c_func, const arma::cx_mat & CW, std::vector<arma::mat> & H, std::vector<double> & Exc, std::vector<double> & Nel, bool fock) {
4537   size_t nocc=CW.n_cols;
4538 
4539   // Allocate memory
4540   if(fock) {
4541     H.resize(nocc);
4542     // Hamiltonians computed in full space
4543     for(size_t ip=0;ip<nocc;ip++)
4544       H[ip].zeros(CW.n_rows,CW.n_rows);
4545   }
4546 
4547   // Clear exchange-correlation energy
4548   Exc.assign(nocc,0.0);
4549   // Clear number of electrons
4550   Nel.assign(nocc,0.0);
4551 
4552 #ifdef _OPENMP
4553 #pragma omp parallel
4554 #endif
4555   {
4556     // Work matrix
4557     arma::mat Hwrk;
4558     // Dummy matrix
4559     arma::mat Hdum;
4560 
4561     if(fock)
4562       // Initialize memory
4563       Hwrk.zeros(CW.n_rows,CW.n_rows);
4564 
4565 #ifndef _OPENMP
4566     int ith=0;
4567 #else
4568     // Current thread is
4569     int ith=omp_get_thread_num();
4570 #endif
4571 
4572     // Loop over grids
4573 #ifdef _OPENMP
4574 #pragma omp for schedule(dynamic,1)
4575 #endif
4576     for(size_t i=0;i<grids.size();i++) {
4577 
4578       // Change atom and create grid
4579       wrk[ith].set_grid(grids[i]);
4580       wrk[ith].form_grid();
4581 
4582       // Loop over densities
4583       for(size_t ip=0;ip<nocc;ip++) {
4584 	// Update density
4585 	wrk[ith].update_density(CW.col(ip));
4586 	// Update number of electrons
4587 #ifdef _OPENMP
4588 #pragma omp atomic
4589 #endif
4590 	Nel[ip]+=wrk[ith].compute_Nel();
4591 
4592 	// Initialize the arrays
4593 	wrk[ith].init_xc();
4594 	// Compute the functionals
4595 	if(x_func>0)
4596 	  wrk[ith].compute_xc(x_func,fock);
4597 	if(c_func>0)
4598 	  wrk[ith].compute_xc(c_func,fock);
4599         wrk[ith].check_xc();
4600 
4601 	// Evaluate the energy
4602 #ifdef _OPENMP
4603 #pragma omp atomic
4604 #endif
4605 	Exc[ip]+=wrk[ith].eval_Exc();
4606 	// and construct the Fock matrices
4607 	if(fock) {
4608 	  Hwrk.zeros(); // need to clear this here
4609 
4610 	  wrk[ith].eval_Fxc(Hwrk,Hdum,false);
4611 
4612 #ifdef _OPENMP
4613 #pragma omp critical
4614 #endif
4615 	  H[ip]+=Hwrk;
4616 	}
4617       }
4618     }
4619 
4620     // Free memory
4621     wrk[ith].free();
4622 
4623   } // End parallel region
4624 }
4625 
eval_VV10(DFTGrid & nl,double b,double C,const arma::mat & P,arma::mat & H,double & Enl_,bool fock)4626 void DFTGrid::eval_VV10(DFTGrid & nl, double b, double C, const arma::mat & P, arma::mat & H, double & Enl_, bool fock) {
4627   // Reset energy
4628   double Enl=0.0;
4629 
4630   // Original gradient and laplacian settings
4631   bool grad, tau, lapl;
4632   wrk[0].get_grad_tau_lapl(grad,tau,lapl);
4633 
4634   // Collect nl grid data
4635   std::vector<arma::mat> nldata(nl.grids.size());
4636 #ifdef _OPENMP
4637 #pragma omp parallel
4638 #endif
4639   {
4640 #ifndef _OPENMP
4641     const int ith=0;
4642 #else
4643     // Current thread is
4644     const int ith=omp_get_thread_num();
4645 #pragma omp for schedule(dynamic,1)
4646 #endif
4647     for(size_t i=0;i<nl.grids.size();i++) {
4648       // Change atom
4649       wrk[ith].set_grid(nl.grids[i]);
4650       wrk[ith].set_grad_tau_lapl(true,false,false);
4651       // Create grid
4652       wrk[ith].form_grid();
4653 
4654       // Update density
4655       wrk[ith].update_density(P);
4656 
4657       // Collect the data
4658       wrk[ith].init_VV10(b,C,false);
4659       std::vector<size_t> idx;
4660       wrk[ith].collect_VV10(nldata[i],idx,b,C,true);
4661 
4662       wrk[ith].free();
4663     }
4664   } // End parallel region
4665 
4666   if(nl.verbose) {
4667     size_t n=0;
4668     for(size_t i=0;i<nldata.size();i++)
4669       n+=nldata[i].n_cols;
4670 
4671     printf("%i points ... ",(int) n);
4672     fflush(stdout);
4673   }
4674 
4675   /*
4676   for(size_t ii=0;ii<nldata.size();ii++)
4677     for(size_t i=0;i<nldata[ii].n_rows;i++) {
4678       for(size_t j=0;j<nldata[ii].n_cols;j++)
4679 	printf(" % e",nldata[ii](i,j));
4680       printf("\n");
4681     }
4682   */
4683 
4684   // Loop over grids
4685 #ifdef _OPENMP
4686 #pragma omp parallel reduction(+:Enl)
4687 #endif
4688   {
4689     arma::mat Hwrk(H);
4690     Hwrk.zeros();
4691 
4692 #ifndef _OPENMP
4693       const int ith=0;
4694 #else
4695       const int ith=omp_get_thread_num();
4696 #pragma omp for schedule(dynamic,1)
4697 #endif
4698     for(size_t i=0;i<grids.size();i++) {
4699       // Change atom
4700       wrk[ith].set_grid(grids[i]);
4701       wrk[ith].set_grad_tau_lapl(true,false,false);
4702       // Initialize worker
4703       wrk[ith].form_grid();
4704 
4705       // Update density
4706       wrk[ith].update_density(P);
4707       // Initialize the arrays
4708       wrk[ith].init_xc();
4709 
4710       // Calculate the integral over the nl grid
4711       wrk[ith].init_VV10(b,C,true);
4712       wrk[ith].compute_VV10(nldata,b,C);
4713 
4714       // Evaluate the energy
4715       Enl+=wrk[ith].eval_Exc();
4716       // and construct the Fock matrices
4717       if(fock)
4718 	wrk[ith].eval_Fxc(Hwrk);
4719 
4720       // Free memory
4721       wrk[ith].free();
4722     }
4723 
4724 #ifdef _OPENMP
4725 #pragma omp critical
4726 #endif
4727     if(fock)
4728       H+=Hwrk;
4729   }
4730 
4731   // Set return variable
4732   Enl_=Enl;
4733 
4734   for(size_t i=0;i<wrk.size();i++)
4735     wrk[i].set_grad_tau_lapl(grad,tau,lapl);
4736 }
4737 
eval_force(int x_func,int c_func,const arma::mat & P)4738 arma::vec DFTGrid::eval_force(int x_func, int c_func, const arma::mat & P) {
4739   arma::vec f(3*basp->get_Nnuc());
4740   f.zeros();
4741 
4742 #ifdef _OPENMP
4743 #pragma omp parallel
4744 #endif
4745   { // Begin parallel region
4746 
4747 #ifndef _OPENMP
4748     const int ith=0;
4749 #else
4750     // Current thread is
4751     const int ith=omp_get_thread_num();
4752 
4753     // Helper
4754     arma::vec fwrk(f);
4755 
4756 #pragma omp for schedule(dynamic,1)
4757 #endif
4758     // Loop over atoms
4759     for(size_t iat=0;iat<grids.size();iat++) {
4760       bool grad, tau, lapl;
4761       wrk[ith].get_grad_tau_lapl(grad,tau,lapl);
4762       // We need gradients for the LDA terms and Laplacian terms for the GGA terms (Pv_i really)
4763       wrk[ith].set_grad_tau_lapl(true,grad,grad);
4764       // Need bf Hessian for GGA and laplacian gradient for MGGA
4765       wrk[ith].set_hess_lgrad(grad,lapl);
4766 
4767       // Change atom and create grid
4768       wrk[ith].set_grid(grids[iat]);
4769       wrk[ith].form_grid();
4770 
4771       // Update density
4772       wrk[ith].update_density(P);
4773 
4774       // Initialize the arrays
4775       wrk[ith].init_xc();
4776       // Compute the functionals
4777       if(x_func>0)
4778 	wrk[ith].compute_xc(x_func,true);
4779       if(c_func>0)
4780 	wrk[ith].compute_xc(c_func,true);
4781       wrk[ith].check_xc();
4782 
4783       // Calculate the force on the atom
4784 #ifdef _OPENMP
4785       fwrk+=wrk[ith].eval_force_r();
4786 #else
4787       f+=wrk[ith].eval_force_r();
4788 #endif
4789 
4790       // Free memory
4791       wrk[ith].free();
4792     }
4793 
4794 #ifdef _OPENMP
4795 #pragma omp critical
4796     f+=fwrk;
4797 #endif
4798   } // End parallel region
4799 
4800   return f;
4801 }
4802 
eval_force(int x_func,int c_func,const arma::mat & Pa,const arma::mat & Pb)4803 arma::vec DFTGrid::eval_force(int x_func, int c_func, const arma::mat & Pa, const arma::mat & Pb) {
4804   arma::vec f(3*basp->get_Nnuc());
4805   f.zeros();
4806 
4807 #ifdef _OPENMP
4808 #pragma omp parallel
4809 #endif
4810   { // Begin parallel region
4811 
4812 #ifndef _OPENMP
4813     const int ith=0;
4814 #else
4815     // Current thread is
4816     const int ith=omp_get_thread_num();
4817 
4818     // Helper
4819     arma::vec fwrk(f);
4820 
4821 #pragma omp for schedule(dynamic,1)
4822 #endif
4823     // Loop over atoms
4824     for(size_t iat=0;iat<grids.size();iat++) {
4825       // We need gradients for the LDA terms
4826       bool grad, tau, lapl;
4827       wrk[ith].get_grad_tau_lapl(grad,tau,lapl);
4828       wrk[ith].set_grad_tau_lapl(true,grad,grad);
4829       // Need bf Hessian for GGA and laplacian gradient for MGGA
4830       wrk[ith].set_hess_lgrad(grad,lapl);
4831 
4832       // Change atom and create grid
4833       wrk[ith].set_grid(grids[iat]);
4834       wrk[ith].form_grid();
4835 
4836       // Update density
4837       wrk[ith].update_density(Pa,Pb);
4838 
4839       // Initialize the arrays
4840       wrk[ith].init_xc();
4841       // Compute the functionals
4842       if(x_func>0)
4843 	wrk[ith].compute_xc(x_func,true);
4844       if(c_func>0)
4845 	wrk[ith].compute_xc(c_func,true);
4846       wrk[ith].check_xc();
4847 
4848       // Calculate the force on the atom
4849 #ifdef _OPENMP
4850       fwrk+=wrk[ith].eval_force_u();
4851 #else
4852       f+=wrk[ith].eval_force_u();
4853 #endif
4854 
4855       // Free memory
4856       wrk[ith].free();
4857     }
4858 
4859 #ifdef _OPENMP
4860 #pragma omp critical
4861     f+=fwrk;
4862 #endif
4863   } // End parallel region
4864 
4865   return f;
4866 }
4867 
eval_VV10_force(DFTGrid & nl,double b,double C,const arma::mat & P)4868 arma::vec DFTGrid::eval_VV10_force(DFTGrid & nl, double b, double C, const arma::mat & P) {
4869   // Forces on atoms
4870   arma::vec f(3*basp->get_Nnuc());
4871   f.zeros();
4872 
4873   // Original gradient and laplacian settings
4874   bool grad, tau, lapl;
4875   wrk[0].get_grad_tau_lapl(grad,tau,lapl);
4876 
4877   // Collect nl grid data
4878   std::vector<arma::mat> nldata(nl.grids.size());
4879 #ifdef _OPENMP
4880 #pragma omp parallel
4881 #endif
4882   {
4883 #ifndef _OPENMP
4884     const int ith=0;
4885 #else
4886     // Current thread is
4887     const int ith=omp_get_thread_num();
4888 #pragma omp for schedule(dynamic,1)
4889 #endif
4890     for(size_t i=0;i<nl.grids.size();i++) {
4891       // Need gradient for VV10 but no laplacian
4892       wrk[ith].set_grad_tau_lapl(true,false,false);
4893       // No need for Hessian or laplacian of gradient
4894       wrk[ith].set_hess_lgrad(false,false);
4895       // Change atom and create grid
4896       wrk[ith].set_grid(nl.grids[i]);
4897       wrk[ith].form_grid();
4898 
4899       // Update density
4900       wrk[ith].update_density(P);
4901 
4902       // Collect the data
4903       wrk[ith].init_VV10(b,C,false);
4904       std::vector<size_t> idx;
4905       wrk[ith].collect_VV10(nldata[i],idx,b,C,true);
4906 
4907       // Free memory
4908       wrk[ith].free();
4909 
4910     } // End nl grid loop
4911   } // End parallel region
4912 
4913 
4914   // Loop over grids
4915 #ifdef _OPENMP
4916 #pragma omp parallel
4917 #endif
4918   {
4919     arma::vec fwrk(f);
4920     fwrk.zeros();
4921 
4922 #ifndef _OPENMP
4923     const int ith=0;
4924 #else
4925     const int ith=omp_get_thread_num();
4926 #pragma omp for schedule(dynamic,1)
4927 #endif
4928     for(size_t i=0;i<grids.size();i++) {
4929       // Need gradient for VV10 and laplacian for VV10 gradient
4930       wrk[ith].set_grad_tau_lapl(true,true,true);
4931       // Need hessian for VV10 gradient
4932       wrk[ith].set_hess_lgrad(true,false);
4933       // Change atom and create grid
4934       wrk[ith].set_grid(grids[i]);
4935       wrk[ith].form_grid();
4936 
4937       // Update density
4938       Timer tp;
4939       wrk[ith].update_density(P);
4940       // Initialize the arrays
4941       wrk[ith].init_xc();
4942 
4943       // Calculate kappa and omega and plug in constant energy density
4944       wrk[ith].init_VV10(b,C,true);
4945       // Evaluate the VV10 energy and potential and get the grid contribution
4946       fwrk.subvec(3*grids[i].atind,3*grids[i].atind+2)+=wrk[ith].compute_VV10_F(nldata,nl.grids,b,C);
4947       // and now evaluate the forces on the atoms
4948       fwrk+=wrk[ith].eval_force_r();
4949 
4950       // Free memory
4951       wrk[ith].free();
4952     }
4953 
4954 #ifdef _OPENMP
4955 #pragma omp critical
4956 #endif
4957     f+=fwrk;
4958   }
4959 
4960   for(size_t i=0;i<wrk.size();i++)
4961     wrk[i].set_grad_tau_lapl(grad,tau,lapl);
4962 
4963   return f;
4964 }
4965 
print_density(const arma::mat & P,std::string densname)4966 void DFTGrid::print_density(const arma::mat & P, std::string densname) {
4967   // Open output files
4968   FILE *dens=fopen(densname.c_str(),"w");
4969 
4970   fprintf(dens,"%i\n",(int) get_Npoints());
4971 
4972   Timer t;
4973   if(verbose) {
4974     printf("\nSaving density data in %s ... ",densname.c_str());
4975     fflush(stdout);
4976   }
4977 
4978 #ifdef _OPENMP
4979 #pragma omp parallel
4980 #endif
4981   { // Begin parallel region
4982 
4983 #ifdef _OPENMP
4984     // Current thread is
4985     const int ith=omp_get_thread_num();
4986 #pragma omp for schedule(dynamic,1)
4987 #else
4988     const int ith=0;
4989 #endif
4990     // Loop over atoms
4991     for(size_t i=0;i<grids.size();i++) {
4992       // Change atom and create grid
4993       wrk[ith].set_grid(grids[i]);
4994       wrk[ith].form_grid();
4995 
4996       // Update density
4997       wrk[ith].update_density(P);
4998 
4999       // Write out density and potential data
5000 #ifdef _OPENMP
5001 #pragma omp critical
5002 #endif
5003       {
5004 	wrk[ith].print_density(dens);
5005       }
5006 
5007       // Free memory
5008       wrk[ith].free();
5009     }
5010   } // End parallel region
5011 
5012     // Close output files
5013   fclose(dens);
5014 
5015   printf("done (%s)\n",t.elapsed().c_str());
5016 }
5017 
print_density(const arma::mat & Pa,const arma::mat & Pb,std::string densname)5018 void DFTGrid::print_density(const arma::mat & Pa, const arma::mat & Pb, std::string densname) {
5019   // Open output files
5020   FILE *dens=fopen(densname.c_str(),"w");
5021 
5022   fprintf(dens,"%i\n",(int) get_Npoints());
5023 
5024   Timer t;
5025   if(verbose) {
5026     printf("\nSaving density data in %s ... ",densname.c_str());
5027     fflush(stdout);
5028   }
5029 
5030 #ifdef _OPENMP
5031 #pragma omp parallel
5032 #endif
5033   { // Begin parallel region
5034 
5035 #ifdef _OPENMP
5036     // Current thread is
5037     const int ith=omp_get_thread_num();
5038 #pragma omp for schedule(dynamic,1)
5039 #else
5040     const int ith=0;
5041 #endif
5042     // Loop over atoms
5043     for(size_t i=0;i<grids.size();i++) {
5044       // Change atom and create grid
5045       wrk[ith].set_grid(grids[i]);
5046       wrk[ith].form_grid();
5047 
5048       // Update density
5049       wrk[ith].update_density(Pa,Pb);
5050 
5051       // Write out density and potential data
5052 #ifdef _OPENMP
5053 #pragma omp critical
5054 #endif
5055       {
5056 	wrk[ith].print_density(dens);
5057       }
5058 
5059       // Free memory
5060       wrk[ith].free();
5061     }
5062   } // End parallel region
5063 
5064     // Close output files
5065   fclose(dens);
5066 
5067   printf("done (%s)\n",t.elapsed().c_str());
5068 }
5069 
print_potential(int func_id,const arma::mat & Pa,const arma::mat & Pb,std::string potname)5070 void DFTGrid::print_potential(int func_id, const arma::mat & Pa, const arma::mat & Pb, std::string potname) {
5071   // Open output files
5072   FILE *pot=fopen(potname.c_str(),"w");
5073   fprintf(pot,"%i\n",(int) get_Npoints());
5074 
5075   Timer t;
5076   if(verbose) {
5077     printf("\nSaving potential data in %s ... ",potname.c_str());
5078     fflush(stdout);
5079   }
5080 
5081 #ifdef _OPENMP
5082 #pragma omp parallel
5083 #endif
5084   { // Begin parallel region
5085 
5086 #ifdef _OPENMP
5087     // Current thread is
5088     const int ith=omp_get_thread_num();
5089 #pragma omp for schedule(dynamic,1)
5090 #else
5091     const int ith=0;
5092 #endif
5093     // Loop over atoms
5094     for(size_t i=0;i<grids.size();i++) {
5095       // Change atom and create grid
5096       wrk[ith].set_grid(grids[i]);
5097       wrk[ith].form_grid();
5098 
5099       // Update density
5100       wrk[ith].update_density(Pa,Pb);
5101 
5102       // Initialize the arrays
5103       wrk[ith].init_xc();
5104       // Compute the functionals
5105       if(func_id>0)
5106 	wrk[ith].compute_xc(func_id,true);
5107       wrk[ith].check_xc();
5108 
5109       // Write out density and potential data
5110 #ifdef _OPENMP
5111 #pragma omp critical
5112 #endif
5113       {
5114 	wrk[ith].print_potential(func_id,pot);
5115       }
5116 
5117       // Free memory
5118       wrk[ith].free();
5119     }
5120   } // End parallel region
5121 
5122     // Close output files
5123   fclose(pot);
5124 
5125   printf("done (%s)\n",t.elapsed().c_str());
5126 }
5127 
check_potential(int func_id,const arma::mat & Pa,const arma::mat & Pb,std::string potname)5128 void DFTGrid::check_potential(int func_id, const arma::mat & Pa, const arma::mat & Pb, std::string potname) {
5129   // Open output files
5130   FILE *pot=fopen(potname.c_str(),"w");
5131 
5132   Timer t;
5133   if(verbose) {
5134     printf("\nRunning potential check. Saving output to %s ... ",potname.c_str());
5135     fflush(stdout);
5136   }
5137 
5138   fprintf(pot,"%23s %23s %23s %23s %23s %23s %23s %23s %23s\n","rhoa","rhob","sigmaaa","sigmaab","sigmabb","lapla","laplb","taua","taub");
5139   fprintf(pot,"%23s %23s %23s %23s %23s %23s %23s %23s %23s %23s\n","exc","vrhoa","vrhob","vsigmaaa","vsigmaab","vsigmabb","vlapla","vlaplb","vtaua","vtaub");
5140 
5141 #ifdef _OPENMP
5142 #pragma omp parallel
5143 #endif
5144   { // Begin parallel region
5145 
5146 #ifdef _OPENMP
5147     // Current thread is
5148     const int ith=omp_get_thread_num();
5149 #pragma omp for schedule(dynamic,1)
5150 #else
5151     const int ith=0;
5152 #endif
5153     // Loop over atoms
5154     for(size_t i=0;i<grids.size();i++) {
5155       // Change atom and create grid
5156       wrk[ith].set_grid(grids[i]);
5157       wrk[ith].form_grid();
5158 
5159       // Update density
5160       wrk[ith].update_density(Pa,Pb);
5161 
5162       // Initialize the arrays
5163       wrk[ith].init_xc();
5164       // Compute the functionals
5165       if(func_id>0)
5166 	wrk[ith].compute_xc(func_id,true);
5167       wrk[ith].check_xc();
5168 
5169       // Write out density and potential data
5170 #ifdef _OPENMP
5171 #pragma omp critical
5172 #endif
5173       {
5174 	wrk[ith].check_potential(pot);
5175       }
5176 
5177       // Free memory
5178       wrk[ith].free();
5179     }
5180   } // End parallel region
5181 
5182   // Close output files
5183   fclose(pot);
5184 
5185   printf("done (%s)\n",t.elapsed().c_str());
5186 }
5187 
print_grid(std::string met) const5188 void DFTGrid::print_grid(std::string met) const {
5189   // Amount of integration points
5190   arma::uvec np(basp->get_Nnuc());
5191   np.zeros();
5192   // Amount of function values
5193   arma::uvec nf(basp->get_Nnuc());
5194   nf.zeros();
5195 
5196   for(size_t i=0;i<grids.size();i++) {
5197     np(grids[i].atind)+=grids[i].np;
5198     nf(grids[i].atind)+=grids[i].nfunc;
5199   }
5200   /*
5201   printf("Grid info:\n");
5202   for(size_t i=0;i<grids.size();i++)
5203     printf("%6i %4i %8.3e %e %2i %7i %10i\n",(int) i, (int) grids[i].atind, grids[i].R, grids[i].w, grids[i].l, (int) grids[i].np, (int) grids[i].nfunc);
5204   */
5205 
5206   printf("Composition of %s grid:\n %7s %7s %10s\n",met.c_str(),"atom","Npoints","Nfuncs");
5207   for(size_t i=0;i<basp->get_Nnuc();i++)
5208     printf(" %4i %-2s %7i %10i\n",(int) i+1, basp->get_symbol(i).c_str(), (int) np(i), (int) nf(i));
5209 }
5210 
density_threshold(const arma::mat & P,double thr)5211 double DFTGrid::density_threshold(const arma::mat & P, double thr) {
5212   // Get the list of orbital density values
5213   std::vector<dens_list_t> list=eval_dens_list(P);
5214 
5215   // Get cutoff
5216   double itg=0.0;
5217   size_t idx=0;
5218   while(itg<thr && idx<list.size()) {
5219     // Increment integral
5220     itg+=list[idx].d*list[idx].w;
5221     // Increment index
5222     idx++;
5223   }
5224 
5225   // Cutoff is thus between idx and idx-1.
5226   return (list[idx].d + list[idx-1].d)/2.0;
5227 }
5228 
eval_SAP()5229 arma::mat DFTGrid::eval_SAP() {
5230   // Amount of basis functions
5231   size_t N=basp->get_Nbf();
5232 
5233   // Returned matrices
5234   arma::mat V(N,N);
5235   V.zeros();
5236 
5237   // SAPs
5238   SAP sap;
5239 
5240   // Change atom and create grid
5241 #ifdef _OPENMP
5242 #pragma omp parallel
5243 #endif
5244   { // Begin parallel region
5245 
5246 #ifdef _OPENMP
5247     int ith=omp_get_thread_num();
5248 #else
5249     int ith=0;
5250 #endif
5251 
5252 #ifdef _OPENMP
5253     arma::mat Vwrk(N,N);
5254     Vwrk.zeros();
5255 #pragma omp for schedule(dynamic,1)
5256 #endif
5257     for(size_t i=0;i<grids.size();i++) {
5258       wrk[ith].set_grid(grids[i]);
5259       wrk[ith].form_grid();
5260       // Evaluate overlap
5261 #ifdef _OPENMP
5262       wrk[ith].eval_SAP(sap,Vwrk);
5263 #else
5264       wrk[ith].eval_SAP(sap,V);
5265 #endif
5266       // Free memory
5267       wrk[ith].free();
5268     }
5269 
5270 #ifdef _OPENMP
5271 #pragma omp critical
5272     V+=Vwrk;
5273 #endif
5274   }
5275 
5276   return V;
5277 }
5278