1 /*-------------------------------------------------------------------
2 Copyright 2012 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #include <electronic/ExCorr.h>
21 #include <electronic/Basis.h>
22 #include <electronic/Everything.h>
23 #include <electronic/ExactExchange.h>
24 #include <electronic/ExCorr_internal.h>
25 #include <electronic/ExCorr_internal_LDA.h>
26 #include <electronic/ExCorr_internal_GGA.h>
27 #include <electronic/ExCorr_internal_mGGA.h>
28 #include <electronic/ExCorr_OrbitalDep_GLLBsc.h>
29 #include <core/Thread.h>
30 #include <core/GpuUtil.h>
31 #include <core/VectorField.h>
32 
33 //---------------- Subset wrapper for MPI parallelization --------------------
34 
evaluateSub(int iStart,int iStop,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau) const35 void Functional::evaluateSub(int iStart, int iStop,
36 	std::vector<const double*> n, std::vector<const double*> sigma,
37 	std::vector<const double*> lap, std::vector<const double*> tau,
38 	double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
39 	std::vector<double*> E_lap, std::vector<double*> E_tau) const
40 {
41 	struct Offset
42 	{	int iStart;
43 		std::vector<const double*> operator()(std::vector<const double*> v) { auto vOut=v; for(auto& p: vOut) if(p) p+=iStart; return vOut; }
44 		std::vector<double*> operator()(std::vector<double*> v) { auto vOut=v; for(auto& p: vOut) if(p) p+=iStart; return vOut; }
45 	}
46 	offset;
47 	offset.iStart = iStart;
48 	evaluate(iStop-iStart, offset(n), offset(sigma), offset(lap), offset(tau), E ? E+iStart : 0, offset(E_n), offset(E_sigma), offset(E_lap), offset(E_tau));
49 }
50 
51 //---------------- Spin-density-matrix transformations for noncollinear magentism --------------------
52 
spinDiagonalize(int N,std::vector<const double * > n,std::vector<const double * > x,std::vector<double * > xDiag)53 void spinDiagonalize(int N, std::vector<const double*> n, std::vector<const double*> x, std::vector<double*> xDiag)
54 {	threadedLoop(spinDiagonalize_calc, N, n, x, xDiag);
55 }
spinDiagonalizeGrad(int N,std::vector<const double * > n,std::vector<const double * > x,std::vector<const double * > E_xDiag,std::vector<double * > E_n,std::vector<double * > E_x)56 void spinDiagonalizeGrad(int N, std::vector<const double*> n, std::vector<const double*> x, std::vector<const double*> E_xDiag, std::vector<double*> E_n, std::vector<double*> E_x)
57 {	threadedLoop(spinDiagonalizeGrad_calc, N, n, x, E_xDiag, E_n, E_x);
58 }
59 #ifdef GPU_ENABLED
60 void spinDiagonalize_gpu(int N, std::vector<const double*> n, std::vector<const double*> x, std::vector<double*> xDiag);
61 void spinDiagonalizeGrad_gpu(int N, std::vector<const double*> n, std::vector<const double*> x, std::vector<const double*> E_xDiag, std::vector<double*> E_n, std::vector<double*> E_x);
62 #endif
63 
64 //---------------- LDA thread launcher / gpu switch --------------------
65 
FunctionalLDA(LDA_Variant variant,double scaleFac)66 FunctionalLDA::FunctionalLDA(LDA_Variant variant, double scaleFac) : Functional(scaleFac), variant(variant)
67 {	switch(variant)
68 	{	case LDA_X_Slater:  logPrintf("Initalized Slater LDA exchange.\n"); break;
69 		case LDA_C_PZ:      logPrintf("Initalized Perdew-Zunger LDA correlation.\n"); break;
70 		case LDA_C_PW:      logPrintf("Initalized Perdew-Wang LDA correlation.\n"); break;
71 		case LDA_C_PW_prec: logPrintf("Initalized Perdew-Wang LDA correlation (extended precision).\n"); break;
72 		case LDA_C_VWN:     logPrintf("Initalized Vosko-Wilk-Nusair LDA correlation.\n"); break;
73 		case LDA_XC_Teter:  logPrintf("Initalized Teter93 LSD exchange+correlation.\n"); break;
74 		case LDA_KE_TF:     logPrintf("Initalized Thomas-Fermi LDA kinetic energy.\n"); break;
75 	}
76 }
77 
78 template<LDA_Variant variant, int nCount>
LDA(int N,array<const double *,nCount> n,double * E,array<double *,nCount> E_n,double scaleFac)79 void LDA(int N, array<const double*,nCount> n, double* E, array<double*,nCount> E_n, double scaleFac)
80 {	threadedLoop(LDA_calc<variant,nCount>::compute, N, n, E, E_n, scaleFac);
81 }
LDA(LDA_Variant variant,int N,std::vector<const double * > n,double * E,std::vector<double * > E_n,double scaleFac)82 void LDA(LDA_Variant variant, int N, std::vector<const double*> n, double* E, std::vector<double*> E_n, double scaleFac)
83 {	SwitchTemplate_spin(SwitchTemplate_LDA, variant, n.size(), LDA, (N, n, E, E_n, scaleFac) )
84 }
85 #ifdef GPU_ENABLED
86 void LDA_gpu(LDA_Variant variant, int N, std::vector<const double*> n, double* E, std::vector<double*> E_n, double scaleFac);
87 #endif
88 
evaluate(int N,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau) const89 void FunctionalLDA::evaluate(int N, std::vector<const double*> n, std::vector<const double*> sigma,
90 	std::vector<const double*> lap, std::vector<const double*> tau,
91 	double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
92 	std::vector<double*> E_lap, std::vector<double*> E_tau) const
93 {	//LDA: so ignore sigma, lap, tau and call the above functions (CPU/GPU as appropriate)
94 	assert(n.size()==1 || n.size()==2);
95 	callPref(LDA)(variant, N, n, E, E_n, scaleFac);
96 }
97 
98 //---------------- GGA thread launcher / gpu switch --------------------
99 
FunctionalGGA(GGA_Variant variant,double scaleFac)100 FunctionalGGA::FunctionalGGA(GGA_Variant variant, double scaleFac) : Functional(scaleFac), variant(variant)
101 {	switch(variant)
102 	{	case GGA_X_PBE: logPrintf("Initalized PBE GGA exchange.\n"); break;
103 		case GGA_C_PBE: logPrintf("Initalized PBE GGA correlation.\n"); break;
104 		case GGA_X_PBEsol: logPrintf("Initalized PBEsol GGA exchange.\n"); break;
105 		case GGA_C_PBEsol: logPrintf("Initalized PBEsol GGA correlation.\n"); break;
106 		case GGA_X_PW91: logPrintf("Initalized PW91 GGA exchange.\n"); break;
107 		case GGA_C_PW91: logPrintf("Initalized PW91 GGA correlation.\n"); break;
108 		case GGA_X_wPBE_SR: logPrintf("Initalized omega-PBE short-ranged GGA exchange.\n"); break;
109 		case GGA_X_GLLBsc: logPrintf("Initalized GLLB-sc GGA exchange potential.\n"); break;
110 		case GGA_X_LB94: logPrintf("Initalized LB94 GGA exchange potential correction.\n"); break;
111 		case GGA_KE_VW: logPrintf("Initialized von Weisacker kinetic energy gradient correction.\n"); break;
112 		case GGA_KE_PW91: logPrintf("Initialized PW91 GGA kinetic energy.\n"); break;
113 	}
114 }
115 
116 template<GGA_Variant variant, bool spinScaling, int nCount>
GGA(int N,array<const double *,nCount> n,array<const double *,2* nCount-1> sigma,double * E,array<double *,nCount> E_n,array<double *,2* nCount-1> E_sigma,double scaleFac)117 void GGA(int N, array<const double*,nCount> n, array<const double*,2*nCount-1> sigma,
118 	double* E, array<double*,nCount> E_n, array<double*,2*nCount-1> E_sigma, double scaleFac)
119 {	threadedLoop(GGA_calc<variant,spinScaling,nCount>::compute, N, n, sigma, E, E_n, E_sigma, scaleFac);
120 }
GGA(GGA_Variant variant,int N,std::vector<const double * > n,std::vector<const double * > sigma,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,double scaleFac)121 void GGA(GGA_Variant variant, int N, std::vector<const double*> n, std::vector<const double*> sigma,
122 	double* E, std::vector<double*> E_n, std::vector<double*> E_sigma, double scaleFac)
123 {	SwitchTemplate_spin(SwitchTemplate_GGA, variant, n.size(), GGA, (N, n, sigma, E, E_n, E_sigma, scaleFac) )
124 }
125 #ifdef GPU_ENABLED
126 void GGA_gpu(GGA_Variant variant, int N, std::vector<const double*> n, std::vector<const double*> sigma,
127 	double* E, std::vector<double*> E_n, std::vector<double*> E_sigma, double scaleFac);
128 #endif
129 
evaluate(int N,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau) const130 void FunctionalGGA::evaluate(int N, std::vector<const double*> n, std::vector<const double*> sigma,
131 	std::vector<const double*> lap, std::vector<const double*> tau,
132 	double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
133 	std::vector<double*> E_lap, std::vector<double*> E_tau) const
134 {	//GGA: so ignore lap, tau and call the above functions (CPU/GPU as appropriate)
135 	assert(n.size()==1 || n.size()==2);
136 	callPref(GGA)(variant, N, n, sigma, E, E_n, E_sigma, scaleFac);
137 }
138 
139 
140 //---------------- metaGGA thread launcher / gpu switch --------------------
141 
FunctionalMGGA(mGGA_Variant variant,double scaleFac)142 FunctionalMGGA::FunctionalMGGA(mGGA_Variant variant, double scaleFac) : Functional(scaleFac), variant(variant)
143 {	switch(variant)
144 	{	case mGGA_X_TPSS: logPrintf("Initalized TPSS mGGA exchange.\n"); break;
145 		case mGGA_C_TPSS: logPrintf("Initalized TPSS mGGA correlation.\n"); break;
146 		case mGGA_X_revTPSS: logPrintf("Initalized revTPSS mGGA exchange.\n"); break;
147 		case mGGA_C_revTPSS: logPrintf("Initalized revTPSS mGGA correlation.\n"); break;
148 	}
149 }
150 
151 template<mGGA_Variant variant, bool spinScaling, int nCount>
mGGA(int N,array<const double *,nCount> n,array<const double *,2* nCount-1> sigma,array<const double *,nCount> lap,array<const double *,nCount> tau,double * E,array<double *,nCount> E_n,array<double *,2* nCount-1> E_sigma,array<double *,nCount> E_lap,array<double *,nCount> E_tau,double scaleFac)152 void mGGA(int N, array<const double*,nCount> n, array<const double*,2*nCount-1> sigma,
153 	array<const double*,nCount> lap, array<const double*,nCount> tau,
154 	double* E, array<double*,nCount> E_n, array<double*,2*nCount-1> E_sigma,
155 	array<double*,nCount> E_lap, array<double*,nCount> E_tau, double scaleFac)
156 {	threadedLoop(mGGA_calc<variant,spinScaling,nCount>::compute, N,
157 		n, sigma, lap, tau, E, E_n, E_sigma, E_lap, E_tau, scaleFac);
158 }
mGGA(mGGA_Variant variant,int N,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau,double scaleFac)159 void mGGA(mGGA_Variant variant, int N, std::vector<const double*> n, std::vector<const double*> sigma,
160 	std::vector<const double*> lap, std::vector<const double*> tau,
161 	double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
162 	std::vector<double*> E_lap, std::vector<double*> E_tau, double scaleFac)
163 {	SwitchTemplate_spin(SwitchTemplate_mGGA, variant, n.size(), mGGA, (N,
164 		n, sigma, lap, tau, E, E_n, E_sigma, E_lap, E_tau, scaleFac) )
165 }
166 #ifdef GPU_ENABLED
167 void mGGA_gpu(mGGA_Variant variant, int N, std::vector<const double*> n, std::vector<const double*> sigma,
168 	std::vector<const double*> lap, std::vector<const double*> tau,
169 	double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
170 	std::vector<double*> E_lap, std::vector<double*> E_tau, double scaleFac);
171 #endif
172 
evaluate(int N,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau) const173 void FunctionalMGGA::evaluate(int N, std::vector<const double*> n, std::vector<const double*> sigma,
174 	std::vector<const double*> lap, std::vector<const double*> tau,
175 	double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
176 	std::vector<double*> E_lap, std::vector<double*> E_tau) const
177 {	//mGGA: so ignore lap, tau and call the above functions (CPU/GPU as appropriate)
178 	assert(n.size()==1 || n.size()==2);
179 	callPref(mGGA)(variant, N, n, sigma, lap, tau, E, E_n, E_sigma, E_lap, E_tau, scaleFac);
180 }
181 
182 
183 
184 //---------------------------- LibXC wrapper -----------------------------
185 #ifdef LIBXC_ENABLED
186 #include <xc.h>
187 
188 #if XC_MAJOR_VERSION >= 4
189 //Provide wrapper emulating libxc v3's interface to get XC reference:
xc_func_info_get_ref(const xc_func_info_type * info,int number)190 char const* xc_func_info_get_ref(const xc_func_info_type *info, int number)
191 {	return xc_func_info_get_references(info, number)->ref;
192 }
193 #endif
194 
195 //! LibXC wrapper that provides an interface similar to that of the internal functionals
196 //! with minor differences to handle the different data order and conventions used by LibXC
197 class FunctionalLibXC
198 {	xc_func_type funcUnpolarized, funcPolarized;
199 public:
needsSigma() const200 	bool needsSigma() const { return funcUnpolarized.info->family != XC_FAMILY_LDA; } //!< gradients needed for GGA and HYB_GGA
needsLap() const201 	bool needsLap() const { return funcUnpolarized.info->family == XC_FAMILY_MGGA; } //!< MGGAs may need laplacian
needsTau() const202 	bool needsTau() const { return funcUnpolarized.info->family == XC_FAMILY_MGGA; } //!< MGGAs need KE density
hasExchange() const203 	bool hasExchange() const { return funcUnpolarized.info->kind == XC_EXCHANGE || funcUnpolarized.info->kind == XC_EXCHANGE_CORRELATION; }
hasCorrelation() const204 	bool hasCorrelation() const { return funcUnpolarized.info->kind == XC_CORRELATION || funcUnpolarized.info->kind == XC_EXCHANGE_CORRELATION; }
hasKinetic() const205 	bool hasKinetic() const { return funcUnpolarized.info->kind == XC_KINETIC; }
hasEnergy() const206 	bool hasEnergy() const { return true; }
exxScale() const207 	double exxScale() const { return funcUnpolarized.cam_omega>0 ? funcUnpolarized.cam_beta : funcUnpolarized.cam_alpha; }
exxOmega() const208 	double exxOmega() const { return funcUnpolarized.cam_omega; }
209 
FunctionalLibXC(int xcCode,const char * typeName)210 	FunctionalLibXC(int xcCode, const char* typeName)
211 	{	if(xc_func_init(&funcUnpolarized, xcCode, XC_UNPOLARIZED) != 0)
212 			die("Error initializing LibXC unpolarized %s functional\n", typeName);
213 		if(xc_func_init(&funcPolarized, xcCode, XC_POLARIZED) != 0)
214 			die("Error initializing LibXC polarized %s functional\n", typeName);
215 
216 		logPrintf("Initialized LibXC %s functional '%s'\n", typeName, funcUnpolarized.info->name);
217 
218 		Citations::add("LibXC library of exchange-correlation functions",
219 			"M. A. L. Marques, M. J. T. Oliveira and T. Burnus, Comput. Phys. Commun. 183, 2272 (2012)");
220 		Citations::add(
221 			funcUnpolarized.info->name + string(" ") + typeName + string(" functional"),
222 				xc_func_info_get_ref(funcUnpolarized.info,0) );
223 	}
~FunctionalLibXC()224 	~FunctionalLibXC()
225 	{	xc_func_end(&funcUnpolarized);
226 		xc_func_end(&funcPolarized);
227 	}
228 
229 	//! Like Functional::evaluate, except different spin components are stored together
230 	//! and the computed energy is per-particle (e) instead of per volume (E).
evaluate(int nCount,int N,const double * n,const double * sigma,const double * lap,const double * tau,double * e,double * E_n,double * E_sigma,double * E_lap,double * E_tau) const231 	void evaluate(int nCount, int N,
232 		const double* n, const double* sigma, const double* lap, const double* tau,
233 		double* e, double* E_n, double* E_sigma, double* E_lap, double* E_tau) const
234 	{
235 		assert(nCount==1 || nCount==2);
236 		const xc_func_type& func = (nCount==1) ? funcUnpolarized : funcPolarized;
237 		int sigmaCount = 2*nCount-1; //1 for unpolarized, 3 for polarized
238 		int Nn = N * nCount;
239 		int Nsigma = N * sigmaCount;
240 		//Alloctae temporaries:
241 		std::vector<double> eTemp(N);
242 		std::vector<double> E_nTemp(E_n ? Nn : 0);
243 		std::vector<double> E_sigmaTemp(E_n && needsSigma() ? Nsigma : 0);
244 		std::vector<double> E_lapTemp(E_n && needsLap() ? Nn : 0);
245 		std::vector<double> E_tauTemp(E_n && needsTau() ? Nn : 0);
246 		//Invoke appropriate LibXC function in scratch space:
247 		if(needsTau())
248 		{	//Project out problematic mGGA points (not handled correctly by LibXC 4:
249 			#if XC_MAJOR_VERSION >= 4
250 			for(int i=0; i<N; i++)
251 			{	double* nPtr = (double*)(n + i*nCount);
252 				double* tauPtr = (double*)(tau + i*nCount);
253 				double nTot = nCount==1 ? *nPtr : (*nPtr + *(nPtr+1));
254 				double tauTot = nCount==1 ? *tauPtr : (*tauPtr + *(tauPtr+1));
255 				double sigmaTot = nCount==1 ? sigma[i] : sigma[3*i]+2*sigma[3*i+1]+sigma[3*i+2];
256 				bool zOffRange = 0.125*sigmaTot > (nTot * tauTot);
257 				if(tauTot < tauCutoff) //Small tau
258 				{	if(nCount==1)
259 					{	*nPtr = 0.;
260 						*tauPtr = 0.;
261 					}
262 					else
263 					{	*nPtr = *(nPtr+1) = 0.;
264 						*tauPtr = *(tauPtr+1) = 0.;
265 					}
266 				}
267 				else if(zOffRange && nTot>nCutoff) //tauVW/tau ratio out of range
268 				{	double tauTotNew = 0.125*sigmaTot/nTot; //von-Weisacker value
269 					if(nCount==1)
270 						*tauPtr = tauTotNew;
271 					else
272 					{	double tauScale = tauTotNew/tauTot;
273 						*tauPtr *= tauScale;
274 						*(tauPtr+1) *= tauScale;
275 					}
276 				}
277 			}
278 			#endif
279 			if(E_n) xc_mgga_exc_vxc(&func, N, n, sigma, lap, tau,
280 				&eTemp[0], &E_nTemp[0], &E_sigmaTemp[0], &E_lapTemp[0], &E_tauTemp[0]);
281 			else xc_mgga_exc(&func, N, n, sigma, lap, tau, &eTemp[0]);
282 		}
283 		else if(needsSigma())
284 		{	if(E_n) xc_gga_exc_vxc(&func, N, n, sigma, &eTemp[0], &E_nTemp[0], &E_sigmaTemp[0]); //need gradient
285 			else xc_gga_exc(&func, N, n, sigma, &eTemp[0]);
286 		}
287 		else
288 		{	if(E_n) xc_lda_exc_vxc(&func, N, n, &eTemp[0], &E_nTemp[0]); //need gradient
289 			else xc_lda_exc(&func, N, n, &eTemp[0]);
290 		}
291 		//Accumulate onto final results
292 		eblas_daxpy(N, 1., &eTemp[0], 1, e, 1);
293 		if(E_nTemp.size()) eblas_daxpy(Nn, 1., &E_nTemp[0], 1, E_n, 1);
294 		if(E_sigmaTemp.size()) eblas_daxpy(Nsigma, 1., &E_sigmaTemp[0], 1, E_sigma, 1);
295 		if(E_lapTemp.size()) eblas_daxpy(Nn, 1., &E_lapTemp[0], 1, E_lap, 1);
296 		if(E_tauTemp.size()) eblas_daxpy(Nn, 1., &E_tauTemp[0], 1, E_tau, 1);
297 	}
298 
evaluate_thread(int iStart,int iStop,const FunctionalLibXC * func,int iOffset,int nCount,const double * n,const double * sigma,const double * lap,const double * tau,double * e,double * E_n,double * E_sigma,double * E_lap,double * E_tau)299 	static void evaluate_thread(int iStart, int iStop, const FunctionalLibXC* func, int iOffset,
300 		int nCount, const double* n, const double* sigma, const double* lap, const double* tau,
301 		double* e, double* E_n, double* E_sigma, double* E_lap, double* E_tau)
302 	{
303 		int offs_e = (iOffset+iStart);
304 		int offs_n = offs_e * nCount;
305 		int offs_sigma = offs_e * (2*nCount-1);
306 		int N = iStop-iStart; if(!N) return;
307 		#define OFFSET(ptr,offset) ((ptr) ? ((ptr)+(offset)) : 0)
308 		func->evaluate(nCount, N, OFFSET(n,offs_n), OFFSET(sigma,offs_sigma), OFFSET(lap,offs_n), OFFSET(tau,offs_n),
309 			OFFSET(e,offs_e), OFFSET(E_n,offs_n), OFFSET(E_sigma,offs_sigma), OFFSET(E_lap,offs_n), OFFSET(E_tau,offs_n) );
310 		#undef OFFSET
311 	}
312 
evaluateSub(int nCount,int iStart,int iStop,const double * n,const double * sigma,const double * lap,const double * tau,double * e,double * E_n,double * E_sigma,double * E_lap,double * E_tau) const313 	void evaluateSub(int nCount, int iStart, int iStop,
314 		const double* n, const double* sigma, const double* lap, const double* tau,
315 		double* e, double* E_n, double* E_sigma, double* E_lap, double* E_tau) const
316 	{
317 		int N = iStop-iStart; if(!N) return;
318 		threadLaunch(FunctionalLibXC::evaluate_thread, N, this, iStart,
319 			nCount, n, sigma, lap, tau, e, E_n, E_sigma, E_lap, E_tau);
320 	}
321 };
322 
323 //! Convert a collection of scalar fields into an interleaved vector field.
324 //! result can be freed using delete[]
transpose(const ScalarFieldArray & inVec)325 template<unsigned M> double* transpose(const ScalarFieldArray& inVec)
326 {	assert(inVec.size()==M);
327 	const unsigned N = inVec[0]->nElem;
328 	const double* in[M]; for(unsigned m=0; m<M; m++) in[m] = inVec[m]->data();
329 	double *out = new double[M*N], *outPtr = out;
330 	for(unsigned n=0; n<N; n++)
331 		for(unsigned m=0; m<M; m++)
332 			*(outPtr++) = in[m][n];
333 	return out;
334 }
335 
336 //! Convert an interleaved vector field to a collection of scalar fields
transpose(double * in,ScalarFieldArray & outVec)337 template<unsigned M> void transpose(double* in, ScalarFieldArray& outVec)
338 {	assert(outVec.size()==M);
339 	const unsigned N = outVec[0]->nElem;
340 	double* out[M]; for(unsigned m=0; m<M; m++) out[m] = outVec[m]->data();
341 	double *inPtr = in;
342 	for(unsigned n=0; n<N; n++)
343 		for(unsigned m=0; m<M; m++)
344 			out[m][n] = *(inPtr++);
345 }
346 
347 #endif //LIBXC_ENABLED
348 
349 //----------------------- Functional List --------------------
350 
351 struct FunctionalList
352 {	std::vector<std::shared_ptr<Functional> > internal; //!<Functionals with an internal implementation
addFunctionalList353 	void add(LDA_Variant variant, double scaleFac=1.0)
354 	{	internal.push_back(std::make_shared<FunctionalLDA>(variant, scaleFac));
355 	}
addFunctionalList356 	void add(GGA_Variant variant, double scaleFac=1.0)
357 	{	internal.push_back(std::make_shared<FunctionalGGA>(variant, scaleFac));
358 	}
addFunctionalList359 	void add(mGGA_Variant variant, double scaleFac=1.0)
360 	{	internal.push_back(std::make_shared<FunctionalMGGA>(variant, scaleFac));
361 	}
362 
363 	#ifdef LIBXC_ENABLED
364 	std::vector<std::shared_ptr<FunctionalLibXC> > libXC; //!<Functionals which use LibXC for evaluation
addFunctionalList365 	void add(int xcCode, const char* name)
366 	{	libXC.push_back(std::make_shared<FunctionalLibXC>(xcCode, name));
367 	}
368 	#endif
369 };
370 
371 
372 //-------------- ExCorr members ----------------
373 
374 extern EnumStringMap<ExCorrType> exCorrTypeMap;
375 
ExCorr(ExCorrType exCorrType,KineticType kineticType)376 ExCorr::ExCorr(ExCorrType exCorrType, KineticType kineticType) : exCorrType(exCorrType), kineticType(kineticType), xcName(exCorrTypeMap.getString(exCorrType)),
377 exxScale(0.), exxOmega(0.), exxScaleOverride(0.), exxOmegaOverride(0.),
378 functionals(std::make_shared<FunctionalList>())
379 #ifdef LIBXC_ENABLED
380 , xcExchange(0), xcCorr(0), xcExcorr(0), xcKinetic(0)
381 #endif
382 {
383 }
384 
setup(const Everything & everything)385 void ExCorr::setup(const Everything& everything)
386 {	e = &everything;
387 
388 	string citeReason = xcName + " exchange-correlation functional";
389 
390 	switch(exCorrType)
391 	{
392 		#ifdef LIBXC_ENABLED
393 		case ExCorrLibXC:
394 		{	if(xcExcorr)
395 			{	functionals->add(xcExcorr, "exchange-correlation");
396 				exxScale = functionals->libXC.back()->exxScale(); //set exact exchange factor
397 				exxOmega = functionals->libXC.back()->exxOmega(); //set exact exchange screening parameter
398 			}
399 			else
400 			{	functionals->add(xcExchange, "exchange");
401 				exxScale = functionals->libXC.back()->exxScale(); //set exact exchange factor
402 				exxOmega = functionals->libXC.back()->exxOmega(); //set exact exchange screening parameter
403 				functionals->add(xcCorr, "correlation");
404 			}
405 			break;
406 		}
407 		if(exxScaleOverride) die("Exact-exchange scale factor cannot be overriden for LibXC functionals.\n")
408 		if(exxOmegaOverride) die("Exact-exchange screening parameter cannot be overriden for LibXC functionals.\n")
409 		#endif //LIBXC_ENABLED
410 
411 		case ExCorrLDA_PZ:
412 			functionals->add(LDA_X_Slater);
413 			functionals->add(LDA_C_PZ);
414 			Citations::add(citeReason, "J.P. Perdew and A. Zunger, Phys. Rev. B 23, 5048 (1981)");
415 			break;
416 		case ExCorrLDA_PW:
417 			functionals->add(LDA_X_Slater);
418 			functionals->add(LDA_C_PW);
419 			Citations::add(citeReason, "J.P. Perdew and Y. Wang, Phys. Rev. B 45, 13244 (1992)");
420 			break;
421 		case ExCorrLDA_PW_prec:
422 			functionals->add(LDA_X_Slater);
423 			functionals->add(LDA_C_PW_prec);
424 			Citations::add(citeReason, "J.P. Perdew and Y. Wang, Phys. Rev. B 45, 13244 (1992)");
425 			break;
426 		case ExCorrLDA_VWN:
427 			functionals->add(LDA_X_Slater);
428 			functionals->add(LDA_C_VWN);
429 			Citations::add(citeReason, "S.H. Vosko, L. Wilk and M. Nusair, Can. J. Phys. 58, 1200 (1980)");
430 			break;
431 		case ExCorrLDA_Teter:
432 			functionals->add(LDA_XC_Teter);
433 			Citations::add(citeReason, "S. Goedecker, M. Teter and J. Hutter, Phys. Rev. B 54, 1703 (1996)");
434 			break;
435 		case ExCorrGGA_PBE:
436 			functionals->add(GGA_X_PBE);
437 			functionals->add(GGA_C_PBE);
438 			Citations::add(citeReason, "J.P. Perdew, K. Burke and M. Ernzerhof, Phys. Rev. Lett. 77, 3865 (1996)");
439 			break;
440 		case ExCorrGGA_PBEsol:
441 			functionals->add(GGA_X_PBEsol);
442 			functionals->add(GGA_C_PBEsol);
443 			Citations::add(citeReason, "J.P. Perdew et al., Phys. Rev. Lett. 100, 136406 (2008)");
444 			break;
445 		case ExCorrGGA_PW91:
446 			functionals->add(GGA_X_PW91);
447 			functionals->add(GGA_C_PW91);
448 			Citations::add(citeReason, "J.P. Perdew et al., Phys. Rev. B 46, 6671 (1992)");
449 			break;
450 		case ExCorrMGGA_TPSS:
451 			functionals->add(mGGA_X_TPSS);
452 			functionals->add(mGGA_C_TPSS);
453 			Citations::add(citeReason, "J. Tao, J.P. Perdew, V.N. Staroverov and G. Scuseria, Phys. Rev. Lett. 91, 146401 (2003)");
454 			break;
455 		case ExCorrMGGA_revTPSS:
456 			functionals->add(mGGA_X_revTPSS);
457 			functionals->add(mGGA_C_revTPSS);
458 			Citations::add(citeReason, "J.P. Perdew et al., Phys. Rev. Lett. 103, 026403 (2009)");
459 			break;
460 		case ExCorrORB_GLLBsc:
461 			if(e->eInfo.spinType == SpinVector)
462 				die("GLLLB-sc functional not implemented for noncollinear spin-polarized calculations.\n");
463 			functionals->add(GGA_X_GLLBsc);
464 			orbitalDep = std::make_shared<ExCorr_OrbitalDep_GLLBsc>(*e);
465 			functionals->add(GGA_C_PBEsol);
466 			Citations::add(citeReason, "M. Kuisma, J. Ojanen, J. Enkovaara and T. T. Rantala, Phys. Rev. B 82, 115106 (2010)");
467 			break;
468 		case ExCorrPOT_LB94:
469 			functionals->add(LDA_X_Slater);
470 			functionals->add(LDA_C_PZ);
471 			functionals->add(GGA_X_LB94);
472 			Citations::add(citeReason, "R. van Leeuwen and E. J. Baerends, Phys. Rev. A 49, 2421 (1994)");
473 			break;
474 		case ExCorrHYB_PBE0:
475 			exxScale = exxScaleOverride ? exxScaleOverride : 1./4;
476 			functionals->add(GGA_X_PBE, 1.-exxScale);
477 			functionals->add(GGA_C_PBE);
478 			Citations::add(citeReason, "M. Ernzerhof and G. E. Scuseria, J. Chem. Phys. 110, 5029 (1999)");
479 			break;
480 		case ExCorrHYB_HSE06:
481 			exxOmega = exxOmegaOverride ? exxOmegaOverride : 0.11;
482 			exxScale = exxScaleOverride ? exxScaleOverride : 1./4;
483 			functionals->add(GGA_X_wPBE_SR, -exxScale);
484 			functionals->add(GGA_X_PBE);
485 			functionals->add(GGA_C_PBE);
486 			Citations::add(citeReason, "A.V. Krukau, O.A. Vydrov, A.F. Izmaylov and G.E. Scuseria, J. Chem. Phys. 125, 224106 (2006)");
487 			break;
488 		case ExCorrHYB_HSE12:
489 			exxOmega = exxOmegaOverride ? exxOmegaOverride : 0.185;
490 			exxScale = exxScaleOverride ? exxScaleOverride : 0.313;
491 			functionals->add(GGA_X_wPBE_SR, -exxScale);
492 			functionals->add(GGA_X_PBE);
493 			functionals->add(GGA_C_PBE);
494 			Citations::add(citeReason, "J.E. Moussa, P.A. Schultz and J.R. Chelikowsky, J. Chem. Phys. 136, 204117 (2012)");
495 			break;
496 		case ExCorrHYB_HSE12s:
497 			exxOmega = exxOmegaOverride ? exxOmegaOverride : 0.408;
498 			exxScale = exxScaleOverride ? exxScaleOverride : 0.425;
499 			functionals->add(GGA_X_wPBE_SR, -exxScale);
500 			functionals->add(GGA_X_PBE);
501 			functionals->add(GGA_C_PBE);
502 			Citations::add(citeReason, "J.E. Moussa, P.A. Schultz and J.R. Chelikowsky, J. Chem. Phys. 136, 204117 (2012)");
503 			break;
504 		case ExCorrHF:
505 			exxScale = 1.;
506 			break;
507 	}
508 
509 	if(exxScaleOverride && exxScale!=exxScaleOverride)
510 		die("Exact-exchange scale factor override is only allowed for hybrid functionals.\n")
511 	if(exxOmegaOverride && exxOmega!=exxOmegaOverride)
512 		die("Exact-exchange screening parameter override is only allowed for screened hybrid functionals.\n")
513 
514 	if(exxScale)
515 	{	logPrintf("Will include %lg x ", exxScale);
516 		if(!exxOmega) logPrintf("exact exchange.\n");
517 		else logPrintf("screened exact exchange with range-parameter %lg\n", exxOmega);
518 	}
519 
520 	switch(kineticType)
521 	{	case KineticNone:
522 			break;
523 		case KineticTF:
524 			functionals->add(LDA_KE_TF);
525 			Citations::add("Thomas-Fermi kinetic energy functional",
526 				"L.H. Thomas, Proc. Cambridge Phil. Soc. 23, 542 (1927)\n"
527 				"E. Fermi, Rend. Accad. Naz. Lincei 6, 602 (1927)");
528 			break;
529 		case KineticVW:
530 			functionals->add(LDA_KE_TF);
531 			functionals->add(GGA_KE_VW,1.);
532 			Citations::add("Thomas-Fermi-von-Weisacker kinetic energy functional",
533 				"C.F.v. Weizsacker, Z. Phys. 96, 431 (1935)");
534 			break;
535 		case KineticPW91:
536 			functionals->add(GGA_KE_PW91);
537 			Citations::add("PW91K kinetic energy functional",
538 				"A. Lembarki and H. Chermette, Phys. Rev. A 50, 5328 (1994)");
539 			break;
540 		#ifdef LIBXC_ENABLED
541 		case KineticLibXC:
542 			functionals->add(xcKinetic, "kinetic");
543 			break;
544 		#endif //LIBXC_ENABLED
545 	}
546 }
547 
548 
getName() const549 string ExCorr::getName() const
550 {	return xcName;
551 }
552 
exxFactor() const553 double ExCorr::exxFactor() const
554 {	return exxScale;
555 }
556 
exxRange() const557 double ExCorr::exxRange() const
558 {	return exxOmega;
559 }
560 
needsKEdensity() const561 bool ExCorr::needsKEdensity() const
562 {	for(auto func: functionals->internal)
563 		if(func->needsTau())
564 			return true;
565 	#ifdef LIBXC_ENABLED
566 	for(auto func: functionals->libXC)
567 		if(func->needsTau())
568 			return true;
569 	#endif
570 	return false;
571 }
572 
hasEnergy() const573 bool ExCorr::hasEnergy() const
574 {	for(auto func: functionals->internal)
575 		if(!func->hasEnergy())
576 			return false;
577 	#ifdef LIBXC_ENABLED
578 	for(auto func: functionals->libXC)
579 		if(!func->hasEnergy())
580 			return false;
581 	#endif
582 	return true;
583 }
584 
585 //! Extract a std::vector of data pointers from a VectorField array, along a specific Cartesian direction
dataPref(std::vector<ScalarFieldMultiplet<T,3>> & x,int iDir)586 template<typename T> std::vector<typename T::DataType*> dataPref(std::vector<ScalarFieldMultiplet<T,3> >& x, int iDir)
587 {	std::vector<typename T::DataType*> xData(x.size());
588 	for(unsigned s=0; s<x.size(); s++)
589 		xData[s] = x[s][iDir] ? x[s][iDir]->dataPref() : 0;
590 	return xData;
591 }
592 
593 //! Extract a std::vector of const data pointers from a VectorField array, along a specific Cartesian direction
constDataPref(const std::vector<ScalarFieldMultiplet<T,3>> & x,int iDir)594 template<typename T> std::vector<const typename T::DataType*> constDataPref(const std::vector<ScalarFieldMultiplet<T,3> >& x, int iDir)
595 {	std::vector<const typename T::DataType*> xData(x.size());
596 	for(unsigned s=0; s<x.size(); s++)
597 		xData[s] = x[s][iDir] ? x[s][iDir]->dataPref() : 0;
598 	return xData;
599 }
600 
601 //Return whether a functional should be included.
602 //Die if the masks are incompatible: eg. requesting X only from a combined XC functional
shouldInclude(const std::shared_ptr<Func> & functional,const IncludeTXC & includeTXC)603 template<typename Func> bool shouldInclude(const std::shared_ptr<Func>& functional, const IncludeTXC& includeTXC)
604 {	bool T = functional->hasKinetic();
605 	bool X = functional->hasExchange();
606 	bool C = functional->hasCorrelation();
607 	bool hasNeeded = (includeTXC.T && T) || (includeTXC.X && X) || (includeTXC.C && C);
608 	bool hasUnneeded = (!includeTXC.T && T) || (!includeTXC.X && X) || (!includeTXC.C && C);
609 	if(hasNeeded && hasUnneeded)
610 	{	string combination, spacer;
611 		if(T) { combination += spacer + " kinetic"; spacer="-"; }
612 		if(X) { combination += spacer + " exchange"; spacer="-"; }
613 		if(C) { combination += spacer + " correlation"; spacer="-"; }
614 		die("ExCorr cannot evaluate only some parts of combined %s functional.\n", combination.c_str());
615 	}
616 	return hasNeeded;
617 }
618 
operator ()(const ScalarFieldArray & n,ScalarFieldArray * Vxc,IncludeTXC includeTXC,const ScalarFieldArray * tauPtr,ScalarFieldArray * Vtau,matrix3<> * Exc_RRT) const619 double ExCorr::operator()(const ScalarFieldArray& n, ScalarFieldArray* Vxc, IncludeTXC includeTXC,
620 		const ScalarFieldArray* tauPtr, ScalarFieldArray* Vtau, matrix3<>* Exc_RRT) const
621 {
622 	static StopWatch watch("ExCorrTotal"), watchComm("ExCorrCommunication"), watchFunc("ExCorrFunctional");
623 	watch.start();
624 
625 	const int nInCount = n.size(); assert(nInCount==1 || nInCount==2 || nInCount==4);
626 	const int nCount = std::min(nInCount, 2); //Number of spin-densities used in the parametrization of the functional
627 	const int sigmaCount = 2*nCount-1;
628 	const GridInfo& gInfo = n[0]->gInfo;
629 
630 	//------- Prepare inputs, allocate outputs -------
631 
632 	//Energy density per volume:
633 	ScalarField E; nullToZero(E, gInfo);
634 
635 	//Gradient w.r.t spin densities:
636 	bool needGradients = (Vxc or Exc_RRT); //need gradient propagation (for Vxc/Vtau, or for Exc_RRT)
637 	if(Vxc) Vxc->clear();
638 	ScalarFieldArray E_n(nCount);
639 	if(needGradients) nullToZero(E_n, gInfo);
640 
641 	//Check for GGAs and meta GGAs:
642 	bool needsSigma = false, needsTau=false, needsLap=false;
643 	for(auto func: functionals->internal)
644 		if(shouldInclude(func, includeTXC))
645 		{	needsSigma |= func->needsSigma();
646 			needsLap |= func->needsLap();
647 			needsTau |= func->needsTau();
648 		}
649 	#ifdef LIBXC_ENABLED
650 	for(auto func: functionals->libXC)
651 		if(shouldInclude(func, includeTXC))
652 		{	needsSigma |= func->needsSigma();
653 			needsLap |= func->needsLap();
654 			needsTau |= func->needsTau();
655 		}
656 	#endif
657 
658 	//Calculate spatial gradients for GGA (if needed)
659 	std::vector<VectorField> Dn(nInCount);
660 	int iDirStart, iDirStop;
661 	TaskDivision(3, mpiWorld).myRange(iDirStart, iDirStop);
662 	if(needsSigma)
663 	{	//Compute the gradients of the (spin-)densities:
664 		for(int s=0; s<nInCount; s++)
665 		{	const ScalarFieldTilde Jn = J(n[s]);
666 			for(int i=iDirStart; i<iDirStop; i++)
667 				Dn[s][i] = I(D(Jn,i));
668 		}
669 	}
670 
671 	//Additional inputs/outputs for MGGAs (Laplacian, orbital KE and gradients w.r.t those)
672 	ScalarFieldArray lap(nInCount), E_lap(nCount);
673 	if(needsLap)
674 	{	//Compute laplacian
675 		for(int s=0; s<nInCount; s++)
676 			lap[s] = (1./gInfo.detR) * I(L(J(n[s])));
677 		//Allocate gradient w.r.t laplacian if required
678 		if(needGradients) nullToZero(E_lap, gInfo);
679 	}
680 	ScalarFieldArray tau(nInCount), E_tau(nCount);
681 	if(needsTau)
682 	{	//make sure orbital KE density has been provided
683 		assert(tauPtr);
684 		tau = *tauPtr;
685 		//allocate gradients w.r.t KE density if required
686 		if(Vxc)
687 		{	assert(Vtau); //if computing gradients, all gradients must be computed
688 			Vtau->clear();
689 		}
690 		if(needGradients) nullToZero(E_tau, gInfo);
691 	}
692 
693 	//Transform to local spin-diagonal basis (noncollinear magnetism mode only)
694 	ScalarFieldArray nCapped(nCount), lapIn(nCount), tauIn(nCount);
695 	std::vector<VectorField> DnIn(nCount);
696 	if(nCount != nInCount)
697 	{	assert(nCount==2); assert(nInCount==4);
698 		std::swap(DnIn, Dn);
699 		std::swap(lapIn, lap);
700 		std::swap(tauIn, tau);
701 		//Density:
702 		nullToZero(nCapped, gInfo);
703 		callPref(spinDiagonalize)(gInfo.nr, constDataPref(n), constDataPref(n),  dataPref(nCapped));
704 		//Spatial gradients:
705 		if(needsSigma)
706 		{	for(int i=iDirStart; i<iDirStop; i++)
707 			{	for(int s=0; s<nCount; s++) nullToZero(Dn[s][i], gInfo);
708 				callPref(spinDiagonalize)(gInfo.nr, constDataPref(n), constDataPref(DnIn,i), dataPref(Dn,i));
709 			}
710 		}
711 		//Laplacian:
712 		if(needsLap)
713 		{	nullToZero(lap, gInfo);
714 			callPref(spinDiagonalize)(gInfo.nr, constDataPref(n), constDataPref(lapIn),  dataPref(lap));
715 		}
716 		//KE density:
717 		if(needsTau)
718 		{	nullToZero(tau, gInfo);
719 			callPref(spinDiagonalize)(gInfo.nr, constDataPref(n), constDataPref(tauIn),  dataPref(tau));
720 		}
721 	}
722 
723 	//Cap negative densities to 0:
724 	if(!nCapped[0]) nCapped = clone(n);
725 	double nMin, nMax;
726 	for(int s=0; s<nCount; s++)
727 		callPref(eblas_capMinMax)(gInfo.nr, nCapped[s]->dataPref(), nMin, nMax, 0.);
728 
729 	//Compute the required contractions for GGA:
730 	ScalarFieldArray sigma(sigmaCount), E_sigma(sigmaCount);
731 	if(needsSigma)
732 	{	for(int s1=0; s1<nCount; s1++)
733 			for(int s2=s1; s2<nCount; s2++)
734 			{	for(int i=iDirStart; i<iDirStop; i++)
735 					sigma[s1+s2] += Dn[s1][i] * Dn[s2][i];
736 				watchComm.start();
737 				nullToZero(sigma[s1+s2], gInfo);
738 				sigma[s1+s2]->allReduceData(mpiWorld, MPIUtil::ReduceSum);
739 				watchComm.stop();
740 			}
741 		//Allocate gradient if required:
742 		if(needGradients) nullToZero(E_sigma, gInfo, sigmaCount);
743 	}
744 
745 	#ifdef LIBXC_ENABLED
746 	//------------------ Evaluate LibXC functionals ---------------
747 	if(functionals->libXC.size())
748 	{	//Prepare input/output data on the CPU in transposed order (spins contiguous)
749 		double *eData = E->data(), *nData=0, *sigmaData=0, *lapData=0, *tauData=0;
750 		double *E_nData=0, *E_sigmaData=0, *E_lapData=0, *E_tauData=0;
751 		if(nCount == 1)
752 		{	nData = nCapped[0]->data();
753 			if(needsSigma) sigmaData = sigma[0]->data();
754 			if(needsLap) lapData = lap[0]->data();
755 			if(needsTau) tauData = tau[0]->data();
756 			if(needGradients)
757 			{	E_nData = E_n[0]->data();
758 				if(needsSigma) E_sigmaData = E_sigma[0]->data();
759 				if(needsLap) E_lapData = E_lap[0]->data();
760 				if(needsTau) E_tauData = E_tau[0]->data();
761 			}
762 		}
763 		else //Need to interleave input spin-vector fields:
764 		{	nData = transpose<2>(nCapped);
765 			if(needsSigma) sigmaData = transpose<3>(sigma);
766 			if(needsLap) lapData = transpose<2>(lap);
767 			if(needsTau) tauData = transpose<2>(tau);
768 			if(needGradients)
769 			{	E_nData = new double[2*gInfo.nr]; eblas_zero(2*gInfo.nr, E_nData);
770 				if(needsSigma) { E_sigmaData = new double[3*gInfo.nr]; eblas_zero(3*gInfo.nr, E_sigmaData); }
771 				if(needsLap) { E_lapData = new double[2*gInfo.nr]; eblas_zero(2*gInfo.nr, E_lapData); }
772 				if(needsTau) { E_tauData = new double[2*gInfo.nr]; eblas_zero(2*gInfo.nr, E_tauData); }
773 			}
774 		}
775 
776 		//Calculate all the required functionals:
777 		watchFunc.start();
778 		for(auto func: functionals->libXC)
779 			if(shouldInclude(func, includeTXC))
780 				func->evaluateSub(nCount, gInfo.irStart, gInfo.irStop, nData, sigmaData, lapData, tauData,
781 					eData, E_nData, E_sigmaData, E_lapData, E_tauData);
782 		watchFunc.stop();
783 
784 		//Uninterleave spin-vector field results:
785 		if(nCount != 1)
786 		{	delete[] nData;
787 			if(needsSigma) delete[] sigmaData;
788 			if(needsLap) delete[] lapData;
789 			if(needsTau) delete[] tauData;
790 			if(needGradients)
791 			{	transpose<2>(E_nData, E_n); delete[] E_nData;
792 				if(needsSigma) { transpose<3>(E_sigmaData, E_sigma); delete[] E_sigmaData; }
793 				if(needsLap) { transpose<2>(E_lapData, E_lap); delete[] E_lapData; }
794 				if(needsTau) { transpose<2>(E_tauData, E_tau); delete[] E_tauData; }
795 			}
796 		}
797 
798 		//Convert per-particle energy to energy density per volume
799 		E = E * (nCount==1 ? nCapped[0] : nCapped[0]+nCapped[1]);
800 	}
801 	#endif //LIBXC_ENABLED
802 
803 	//---------------- Compute internal functionals ----------------
804 	watchFunc.start();
805 	for(auto func: functionals->internal)
806 		if(shouldInclude(func, includeTXC))
807 			func->evaluateSub(gInfo.irStart, gInfo.irStop,
808 				constDataPref(nCapped), constDataPref(sigma), constDataPref(lap), constDataPref(tau),
809 				E->dataPref(), dataPref(E_n), dataPref(E_sigma), dataPref(E_lap), dataPref(E_tau));
810 	watchFunc.stop();
811 
812 	//Cleanup unneeded derived quantities (free memory before starting communications and gradient propagation)
813 	double Exc = integral(E); E = 0; //note Exc accumulated over processes below in communication block
814 	nCapped.clear();
815 	sigma.clear();
816 	lap.clear();
817 	tau.clear();
818 
819 	//---------------- Collect results over processes ----------------
820 	watchComm.start();
821 	mpiWorld->allReduce(Exc, MPIUtil::ReduceSum);
822 	for(ScalarField& x: E_n) if(x) x->allReduceData(mpiWorld, MPIUtil::ReduceSum);
823 	for(ScalarField& x: E_sigma) if(x) x->allReduceData(mpiWorld, MPIUtil::ReduceSum);
824 	for(ScalarField& x: E_lap) if(x) x->allReduceData(mpiWorld, MPIUtil::ReduceSum);
825 	for(ScalarField& x: E_tau) if(x) x->allReduceData(mpiWorld, MPIUtil::ReduceSum);
826 	watchComm.stop();
827 
828 	//--------------- Gradient propagation ---------------------
829 	if(needGradients)
830 	{	//Change gradients from diagonal to spin-density-matrix if necessary
831 		if(nCount != nInCount)
832 		{	//Density:
833 			{	ScalarFieldArray E_nIn(nInCount); nullToZero(E_nIn, gInfo);
834 				callPref(spinDiagonalizeGrad)(gInfo.nr, constDataPref(n), constDataPref(n), constDataPref(E_n), dataPref(E_nIn), dataPref(E_nIn));
835 				std::swap(E_nIn, E_n);
836 			}
837 			//KE density:
838 			if(needsTau)
839 			{	ScalarFieldArray E_tauIn(nInCount); nullToZero(E_tauIn, gInfo);
840 				callPref(spinDiagonalizeGrad)(gInfo.nr, constDataPref(n), constDataPref(tauIn), constDataPref(E_tau), dataPref(E_n), dataPref(E_tauIn));
841 				std::swap(E_tauIn, E_tau);
842 			}
843 			else E_tau.resize(nInCount);
844 			//Laplacian:
845 			if(needsLap)
846 			{	ScalarFieldArray E_lapIn(nInCount); nullToZero(E_lapIn, gInfo);
847 				callPref(spinDiagonalizeGrad)(gInfo.nr, constDataPref(n), constDataPref(lapIn), constDataPref(E_lap), dataPref(E_n), dataPref(E_lapIn));
848 				std::swap(E_lapIn, E_lap);
849 				lapIn.clear();
850 			}
851 		}
852 
853 		//Propagate Laplacian contribution to density
854 		if(needsLap)
855 		{	for(int s=0; s<nInCount; s++)
856 			{	E_n[s] += Jdag((1./gInfo.detR) * L(Idag(E_lap[s])));
857 				if(Exc_RRT) *Exc_RRT += Lstress(J(n[s]), J(E_lap[s]));
858 			}
859 			E_lap.clear();
860 		}
861 
862 		//Propagate spatial gradient contribution to density
863 		if(needsSigma)
864 		{	ScalarFieldTildeArray E_nTilde(nInCount); //contribution to the potential in fourier space
865 			matrix3<> Esigma_RRT; //stress contribution through gradients
866 			std::vector<VectorField> DnAll(nInCount); //gradients of all density components required for stress calculation
867 			if(Exc_RRT)
868 			{	for(int s=0; s<nInCount; s++)
869 				{	const ScalarFieldTilde Jn = J(n[s]);
870 					for(int i=0; i<3; i++)
871 						DnAll[s][i] = (i>=iDirStart and i<iDirStop and nInCount==nCount) ? Dn[s][i] : I(D(Jn,i));
872 				}
873 			}
874 			for(int i=iDirStart; i<iDirStop; i++)
875 			{	//Propagate from contraction sigma to the spatial derivatives
876 				ScalarFieldArray E_Dni(nCount);
877 				for(int s1=0; s1<nCount; s1++)
878 					for(int s2=s1; s2<nCount; s2++)
879 					{	if(s1==s2) E_Dni[s1] += 2*(E_sigma[s1+s2] * Dn[s1][i]);
880 						else
881 						{	E_Dni[s1] += E_sigma[s1+s2] * Dn[s2][i];
882 							E_Dni[s2] += E_sigma[s1+s2] * Dn[s1][i];
883 						}
884 					}
885 				//Convert from diagonal to spin-density-matrix if necessary
886 				if(nCount != nInCount)
887 				{	ScalarFieldArray E_DniIn(nInCount); nullToZero(E_DniIn, gInfo);
888 					callPref(spinDiagonalizeGrad)(gInfo.nr, constDataPref(n), constDataPref(DnIn,i), constDataPref(E_Dni), dataPref(E_n), dataPref(E_DniIn));
889 					std::swap(E_DniIn, E_Dni);
890 				}
891 				//Propagate to E_nTilde:
892 				for(int s=0; s<nInCount; s++)
893 					E_nTilde[s] -= D(Idag(E_Dni[s]), i);
894 				//Propagate to lattice derivative:
895 				if(Exc_RRT)
896 				{	for(int s=0; s<nInCount; s++)
897 						for(int j=0; j<3; j++)
898 							Esigma_RRT(i,j) -= dot(E_Dni[s], DnAll[s][j]) * gInfo.dV;
899 				}
900 			}
901 			//Accumulate over processes:
902 			for(int s=0; s<nInCount; s++)
903 			{	watchComm.start();
904 				nullToZero(E_nTilde[s], gInfo);
905 				E_nTilde[s]->allReduceData(mpiWorld, MPIUtil::ReduceSum);
906 				watchComm.stop();
907 				E_n[s] += Jdag(E_nTilde[s]);
908 			}
909 			if(Exc_RRT)
910 			{	mpiWorld->allReduce(Esigma_RRT, MPIUtil::ReduceSum);
911 				*Exc_RRT += Esigma_RRT;
912 			}
913 		}
914 	}
915 
916 	if(Vxc) *Vxc = E_n;
917 	if(Vtau) *Vtau = E_tau;
918 	watch.stop();
919 	return Exc;
920 }
921 
922 //Unpolarized wrapper to above function:
operator ()(const ScalarField & n,ScalarField * Vxc,IncludeTXC includeTXC,const ScalarField * tau,ScalarField * Vtau,matrix3<> * Exc_RRT) const923 double ExCorr::operator()(const ScalarField& n, ScalarField* Vxc, IncludeTXC includeTXC,
924 		const ScalarField* tau, ScalarField* Vtau, matrix3<>* Exc_RRT) const
925 {	ScalarFieldArray VxcArr(1), tauArr(1), VtauArr(1);
926 	if(tau) tauArr[0] = *tau;
927 	double Exc =  (*this)(ScalarFieldArray(1, n), Vxc ? &VxcArr : 0, includeTXC,
928 		tau ? &tauArr :0, Vtau ? &VtauArr : 0, Exc_RRT);
929 	if(Vxc) *Vxc = VxcArr[0];
930 	if(Vtau) *Vtau = VtauArr[0];
931 	return Exc;
932 }
933 
setMask(size_t iStart,size_t iStop,const double * n,double * mask,double nCut)934 inline void setMask(size_t iStart, size_t iStop, const double* n, double* mask, double nCut)
935 {	for(size_t i=iStart; i<iStop; i++) mask[i] = (n[i]<nCut ? 0. : 1.);
936 }
937 
getSecondDerivatives(const ScalarField & n,ScalarField & e_nn,ScalarField & e_sigma,ScalarField & e_nsigma,ScalarField & e_sigmasigma,double nCut) const938 void ExCorr::getSecondDerivatives(const ScalarField& n, ScalarField& e_nn, ScalarField& e_sigma, ScalarField& e_nsigma, ScalarField& e_sigmasigma, double nCut) const
939 {
940 	//Check for GGAs and meta GGAs:
941 	bool needsSigma = false, needsTau=false, needsLap=false;
942 	for(auto func: functionals->internal)
943 		if(!func->hasKinetic())
944 		{	needsSigma |= func->needsSigma();
945 			needsLap |= func->needsLap();
946 			needsTau |= func->needsTau();
947 		}
948 	#ifdef LIBXC_ENABLED
949 	for(auto func: functionals->libXC)
950 		if(!func->hasKinetic())
951 		{	needsSigma |= func->needsSigma();
952 			needsLap |= func->needsLap();
953 			needsTau |= func->needsTau();
954 		}
955 	#endif
956 
957 	if(needsTau || needsLap) die("Second derivatives implemented only for LDAs and GGAs.\n");
958 
959 	const double eps = 1e-7; //Order sqrt(double-precision epsilon)
960 	const double scalePlus = 1.+eps;
961 	const double scaleMinus = 1.-eps;
962 	const ScalarField nPlus = scalePlus * n;
963 	const ScalarField nMinus = scaleMinus * n;
964 	const GridInfo& gInfo = n->gInfo;
965 
966 	//Compute mask to zero out low density regions
967 	ScalarField mask(ScalarFieldData::alloc(gInfo));
968 	threadLaunch(setMask, gInfo.nr, n->data(), mask->data(), nCut);
969 
970 	//Compute gradient-squared for GGA
971 	ScalarField sigma, sigmaPlus, sigmaMinus;
972 	if(needsSigma)
973 	{	sigma = lengthSquared(gradient(n));
974 		sigmaPlus = scalePlus * sigma;
975 		sigmaMinus = scaleMinus * sigma;
976 	}
977 
978 	//Configurations of n and sigma, and the gradients w.r.t them:
979 	struct Config
980 	{	const ScalarField *n, *sigma;
981 		ScalarField e_n, e_sigma;
982 	};
983 	std::vector<Config> configs(5);
984 	configs[0].n = &n;      configs[0].sigma = &sigma;      //original point
985 	configs[1].n = &nPlus;  configs[1].sigma = &sigma;      // + dn
986 	configs[2].n = &nMinus; configs[2].sigma = &sigma;      // - dn
987 	configs[3].n = &n;      configs[3].sigma = &sigmaPlus;  // + dsigma
988 	configs[4].n = &n;      configs[4].sigma = &sigmaMinus; // - dsigma
989 
990 	//Compute the gradients at all the configurations:
991 	ScalarField eTmp; nullToZero(eTmp, gInfo); //temporary energy return value (ignored)
992 	for(int i=0; i<(needsSigma ? 5 : 3); i++)
993 	{	Config& c = configs[i];
994 		std::vector<const double*> nData(1), sigmaData(1), lapData(1), tauData(1);
995 		std::vector<double*> e_nData(1), e_sigmaData(1), e_lapData(1), e_tauData(1);
996 		double* eData = eTmp->dataPref();
997 		nData[0] = (*c.n)->dataPref();
998 		nullToZero(c.e_n, gInfo);
999 		e_nData[0] = c.e_n->dataPref();
1000 		if(needsSigma)
1001 		{	sigmaData[0] = (*c.sigma)->dataPref();
1002 			nullToZero(c.e_sigma, gInfo);
1003 			e_sigmaData[0] = c.e_sigma->dataPref();
1004 		}
1005 		#ifdef LIBXC_ENABLED
1006 		//Compute LibXC functionals:
1007 		for(auto func: functionals->libXC)
1008 			if(!func->hasKinetic())
1009 				func->evaluate(1, gInfo.nr, nData[0], sigmaData[0], lapData[0], tauData[0],
1010 					eData, e_nData[0], e_sigmaData[0], e_lapData[0], e_tauData[0]);
1011 		#endif
1012 		//Compute internal functionals:
1013 		for(auto func: functionals->internal)
1014 			if(!func->hasKinetic())
1015 				func->evaluate(gInfo.nr, nData, sigmaData, lapData, tauData,
1016 					eData, e_nData, e_sigmaData, e_lapData, e_tauData);
1017 	}
1018 
1019 	//Compute finite difference derivatives:
1020 	ScalarField nDen = (0.5/eps) * inv(n) * mask;
1021 	e_nn = nDen * (configs[1].e_n - configs[2].e_n);
1022 	if(needsSigma)
1023 	{	ScalarField sigmaDen = (0.5/eps) * inv(sigma) * mask;
1024 		e_sigma = configs[0].e_sigma*mask; //First derivative available analytically
1025 		e_nsigma = 0.5*(nDen * (configs[1].e_sigma - configs[2].e_sigma) + sigmaDen * (configs[3].e_n - configs[4].e_n));
1026 		e_sigmasigma = sigmaDen * (configs[3].e_sigma - configs[4].e_sigma);
1027 	}
1028 	else
1029 	{	e_sigma = 0;
1030 		e_nsigma = 0;
1031 		e_sigmasigma = 0;
1032 	}
1033 }
1034