1 /*-------------------------------------------------------------------
2 Copyright 2012 Ravishankar Sundararaman
3
4 This file is part of JDFTx.
5
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19
20 #include <electronic/ExCorr.h>
21 #include <electronic/Basis.h>
22 #include <electronic/Everything.h>
23 #include <electronic/ExactExchange.h>
24 #include <electronic/ExCorr_internal.h>
25 #include <electronic/ExCorr_internal_LDA.h>
26 #include <electronic/ExCorr_internal_GGA.h>
27 #include <electronic/ExCorr_internal_mGGA.h>
28 #include <electronic/ExCorr_OrbitalDep_GLLBsc.h>
29 #include <core/Thread.h>
30 #include <core/GpuUtil.h>
31 #include <core/VectorField.h>
32
33 //---------------- Subset wrapper for MPI parallelization --------------------
34
evaluateSub(int iStart,int iStop,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau) const35 void Functional::evaluateSub(int iStart, int iStop,
36 std::vector<const double*> n, std::vector<const double*> sigma,
37 std::vector<const double*> lap, std::vector<const double*> tau,
38 double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
39 std::vector<double*> E_lap, std::vector<double*> E_tau) const
40 {
41 struct Offset
42 { int iStart;
43 std::vector<const double*> operator()(std::vector<const double*> v) { auto vOut=v; for(auto& p: vOut) if(p) p+=iStart; return vOut; }
44 std::vector<double*> operator()(std::vector<double*> v) { auto vOut=v; for(auto& p: vOut) if(p) p+=iStart; return vOut; }
45 }
46 offset;
47 offset.iStart = iStart;
48 evaluate(iStop-iStart, offset(n), offset(sigma), offset(lap), offset(tau), E ? E+iStart : 0, offset(E_n), offset(E_sigma), offset(E_lap), offset(E_tau));
49 }
50
51 //---------------- Spin-density-matrix transformations for noncollinear magentism --------------------
52
spinDiagonalize(int N,std::vector<const double * > n,std::vector<const double * > x,std::vector<double * > xDiag)53 void spinDiagonalize(int N, std::vector<const double*> n, std::vector<const double*> x, std::vector<double*> xDiag)
54 { threadedLoop(spinDiagonalize_calc, N, n, x, xDiag);
55 }
spinDiagonalizeGrad(int N,std::vector<const double * > n,std::vector<const double * > x,std::vector<const double * > E_xDiag,std::vector<double * > E_n,std::vector<double * > E_x)56 void spinDiagonalizeGrad(int N, std::vector<const double*> n, std::vector<const double*> x, std::vector<const double*> E_xDiag, std::vector<double*> E_n, std::vector<double*> E_x)
57 { threadedLoop(spinDiagonalizeGrad_calc, N, n, x, E_xDiag, E_n, E_x);
58 }
59 #ifdef GPU_ENABLED
60 void spinDiagonalize_gpu(int N, std::vector<const double*> n, std::vector<const double*> x, std::vector<double*> xDiag);
61 void spinDiagonalizeGrad_gpu(int N, std::vector<const double*> n, std::vector<const double*> x, std::vector<const double*> E_xDiag, std::vector<double*> E_n, std::vector<double*> E_x);
62 #endif
63
64 //---------------- LDA thread launcher / gpu switch --------------------
65
FunctionalLDA(LDA_Variant variant,double scaleFac)66 FunctionalLDA::FunctionalLDA(LDA_Variant variant, double scaleFac) : Functional(scaleFac), variant(variant)
67 { switch(variant)
68 { case LDA_X_Slater: logPrintf("Initalized Slater LDA exchange.\n"); break;
69 case LDA_C_PZ: logPrintf("Initalized Perdew-Zunger LDA correlation.\n"); break;
70 case LDA_C_PW: logPrintf("Initalized Perdew-Wang LDA correlation.\n"); break;
71 case LDA_C_PW_prec: logPrintf("Initalized Perdew-Wang LDA correlation (extended precision).\n"); break;
72 case LDA_C_VWN: logPrintf("Initalized Vosko-Wilk-Nusair LDA correlation.\n"); break;
73 case LDA_XC_Teter: logPrintf("Initalized Teter93 LSD exchange+correlation.\n"); break;
74 case LDA_KE_TF: logPrintf("Initalized Thomas-Fermi LDA kinetic energy.\n"); break;
75 }
76 }
77
78 template<LDA_Variant variant, int nCount>
LDA(int N,array<const double *,nCount> n,double * E,array<double *,nCount> E_n,double scaleFac)79 void LDA(int N, array<const double*,nCount> n, double* E, array<double*,nCount> E_n, double scaleFac)
80 { threadedLoop(LDA_calc<variant,nCount>::compute, N, n, E, E_n, scaleFac);
81 }
LDA(LDA_Variant variant,int N,std::vector<const double * > n,double * E,std::vector<double * > E_n,double scaleFac)82 void LDA(LDA_Variant variant, int N, std::vector<const double*> n, double* E, std::vector<double*> E_n, double scaleFac)
83 { SwitchTemplate_spin(SwitchTemplate_LDA, variant, n.size(), LDA, (N, n, E, E_n, scaleFac) )
84 }
85 #ifdef GPU_ENABLED
86 void LDA_gpu(LDA_Variant variant, int N, std::vector<const double*> n, double* E, std::vector<double*> E_n, double scaleFac);
87 #endif
88
evaluate(int N,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau) const89 void FunctionalLDA::evaluate(int N, std::vector<const double*> n, std::vector<const double*> sigma,
90 std::vector<const double*> lap, std::vector<const double*> tau,
91 double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
92 std::vector<double*> E_lap, std::vector<double*> E_tau) const
93 { //LDA: so ignore sigma, lap, tau and call the above functions (CPU/GPU as appropriate)
94 assert(n.size()==1 || n.size()==2);
95 callPref(LDA)(variant, N, n, E, E_n, scaleFac);
96 }
97
98 //---------------- GGA thread launcher / gpu switch --------------------
99
FunctionalGGA(GGA_Variant variant,double scaleFac)100 FunctionalGGA::FunctionalGGA(GGA_Variant variant, double scaleFac) : Functional(scaleFac), variant(variant)
101 { switch(variant)
102 { case GGA_X_PBE: logPrintf("Initalized PBE GGA exchange.\n"); break;
103 case GGA_C_PBE: logPrintf("Initalized PBE GGA correlation.\n"); break;
104 case GGA_X_PBEsol: logPrintf("Initalized PBEsol GGA exchange.\n"); break;
105 case GGA_C_PBEsol: logPrintf("Initalized PBEsol GGA correlation.\n"); break;
106 case GGA_X_PW91: logPrintf("Initalized PW91 GGA exchange.\n"); break;
107 case GGA_C_PW91: logPrintf("Initalized PW91 GGA correlation.\n"); break;
108 case GGA_X_wPBE_SR: logPrintf("Initalized omega-PBE short-ranged GGA exchange.\n"); break;
109 case GGA_X_GLLBsc: logPrintf("Initalized GLLB-sc GGA exchange potential.\n"); break;
110 case GGA_X_LB94: logPrintf("Initalized LB94 GGA exchange potential correction.\n"); break;
111 case GGA_KE_VW: logPrintf("Initialized von Weisacker kinetic energy gradient correction.\n"); break;
112 case GGA_KE_PW91: logPrintf("Initialized PW91 GGA kinetic energy.\n"); break;
113 }
114 }
115
116 template<GGA_Variant variant, bool spinScaling, int nCount>
GGA(int N,array<const double *,nCount> n,array<const double *,2* nCount-1> sigma,double * E,array<double *,nCount> E_n,array<double *,2* nCount-1> E_sigma,double scaleFac)117 void GGA(int N, array<const double*,nCount> n, array<const double*,2*nCount-1> sigma,
118 double* E, array<double*,nCount> E_n, array<double*,2*nCount-1> E_sigma, double scaleFac)
119 { threadedLoop(GGA_calc<variant,spinScaling,nCount>::compute, N, n, sigma, E, E_n, E_sigma, scaleFac);
120 }
GGA(GGA_Variant variant,int N,std::vector<const double * > n,std::vector<const double * > sigma,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,double scaleFac)121 void GGA(GGA_Variant variant, int N, std::vector<const double*> n, std::vector<const double*> sigma,
122 double* E, std::vector<double*> E_n, std::vector<double*> E_sigma, double scaleFac)
123 { SwitchTemplate_spin(SwitchTemplate_GGA, variant, n.size(), GGA, (N, n, sigma, E, E_n, E_sigma, scaleFac) )
124 }
125 #ifdef GPU_ENABLED
126 void GGA_gpu(GGA_Variant variant, int N, std::vector<const double*> n, std::vector<const double*> sigma,
127 double* E, std::vector<double*> E_n, std::vector<double*> E_sigma, double scaleFac);
128 #endif
129
evaluate(int N,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau) const130 void FunctionalGGA::evaluate(int N, std::vector<const double*> n, std::vector<const double*> sigma,
131 std::vector<const double*> lap, std::vector<const double*> tau,
132 double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
133 std::vector<double*> E_lap, std::vector<double*> E_tau) const
134 { //GGA: so ignore lap, tau and call the above functions (CPU/GPU as appropriate)
135 assert(n.size()==1 || n.size()==2);
136 callPref(GGA)(variant, N, n, sigma, E, E_n, E_sigma, scaleFac);
137 }
138
139
140 //---------------- metaGGA thread launcher / gpu switch --------------------
141
FunctionalMGGA(mGGA_Variant variant,double scaleFac)142 FunctionalMGGA::FunctionalMGGA(mGGA_Variant variant, double scaleFac) : Functional(scaleFac), variant(variant)
143 { switch(variant)
144 { case mGGA_X_TPSS: logPrintf("Initalized TPSS mGGA exchange.\n"); break;
145 case mGGA_C_TPSS: logPrintf("Initalized TPSS mGGA correlation.\n"); break;
146 case mGGA_X_revTPSS: logPrintf("Initalized revTPSS mGGA exchange.\n"); break;
147 case mGGA_C_revTPSS: logPrintf("Initalized revTPSS mGGA correlation.\n"); break;
148 }
149 }
150
151 template<mGGA_Variant variant, bool spinScaling, int nCount>
mGGA(int N,array<const double *,nCount> n,array<const double *,2* nCount-1> sigma,array<const double *,nCount> lap,array<const double *,nCount> tau,double * E,array<double *,nCount> E_n,array<double *,2* nCount-1> E_sigma,array<double *,nCount> E_lap,array<double *,nCount> E_tau,double scaleFac)152 void mGGA(int N, array<const double*,nCount> n, array<const double*,2*nCount-1> sigma,
153 array<const double*,nCount> lap, array<const double*,nCount> tau,
154 double* E, array<double*,nCount> E_n, array<double*,2*nCount-1> E_sigma,
155 array<double*,nCount> E_lap, array<double*,nCount> E_tau, double scaleFac)
156 { threadedLoop(mGGA_calc<variant,spinScaling,nCount>::compute, N,
157 n, sigma, lap, tau, E, E_n, E_sigma, E_lap, E_tau, scaleFac);
158 }
mGGA(mGGA_Variant variant,int N,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau,double scaleFac)159 void mGGA(mGGA_Variant variant, int N, std::vector<const double*> n, std::vector<const double*> sigma,
160 std::vector<const double*> lap, std::vector<const double*> tau,
161 double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
162 std::vector<double*> E_lap, std::vector<double*> E_tau, double scaleFac)
163 { SwitchTemplate_spin(SwitchTemplate_mGGA, variant, n.size(), mGGA, (N,
164 n, sigma, lap, tau, E, E_n, E_sigma, E_lap, E_tau, scaleFac) )
165 }
166 #ifdef GPU_ENABLED
167 void mGGA_gpu(mGGA_Variant variant, int N, std::vector<const double*> n, std::vector<const double*> sigma,
168 std::vector<const double*> lap, std::vector<const double*> tau,
169 double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
170 std::vector<double*> E_lap, std::vector<double*> E_tau, double scaleFac);
171 #endif
172
evaluate(int N,std::vector<const double * > n,std::vector<const double * > sigma,std::vector<const double * > lap,std::vector<const double * > tau,double * E,std::vector<double * > E_n,std::vector<double * > E_sigma,std::vector<double * > E_lap,std::vector<double * > E_tau) const173 void FunctionalMGGA::evaluate(int N, std::vector<const double*> n, std::vector<const double*> sigma,
174 std::vector<const double*> lap, std::vector<const double*> tau,
175 double* E, std::vector<double*> E_n, std::vector<double*> E_sigma,
176 std::vector<double*> E_lap, std::vector<double*> E_tau) const
177 { //mGGA: so ignore lap, tau and call the above functions (CPU/GPU as appropriate)
178 assert(n.size()==1 || n.size()==2);
179 callPref(mGGA)(variant, N, n, sigma, lap, tau, E, E_n, E_sigma, E_lap, E_tau, scaleFac);
180 }
181
182
183
184 //---------------------------- LibXC wrapper -----------------------------
185 #ifdef LIBXC_ENABLED
186 #include <xc.h>
187
188 #if XC_MAJOR_VERSION >= 4
189 //Provide wrapper emulating libxc v3's interface to get XC reference:
xc_func_info_get_ref(const xc_func_info_type * info,int number)190 char const* xc_func_info_get_ref(const xc_func_info_type *info, int number)
191 { return xc_func_info_get_references(info, number)->ref;
192 }
193 #endif
194
195 //! LibXC wrapper that provides an interface similar to that of the internal functionals
196 //! with minor differences to handle the different data order and conventions used by LibXC
197 class FunctionalLibXC
198 { xc_func_type funcUnpolarized, funcPolarized;
199 public:
needsSigma() const200 bool needsSigma() const { return funcUnpolarized.info->family != XC_FAMILY_LDA; } //!< gradients needed for GGA and HYB_GGA
needsLap() const201 bool needsLap() const { return funcUnpolarized.info->family == XC_FAMILY_MGGA; } //!< MGGAs may need laplacian
needsTau() const202 bool needsTau() const { return funcUnpolarized.info->family == XC_FAMILY_MGGA; } //!< MGGAs need KE density
hasExchange() const203 bool hasExchange() const { return funcUnpolarized.info->kind == XC_EXCHANGE || funcUnpolarized.info->kind == XC_EXCHANGE_CORRELATION; }
hasCorrelation() const204 bool hasCorrelation() const { return funcUnpolarized.info->kind == XC_CORRELATION || funcUnpolarized.info->kind == XC_EXCHANGE_CORRELATION; }
hasKinetic() const205 bool hasKinetic() const { return funcUnpolarized.info->kind == XC_KINETIC; }
hasEnergy() const206 bool hasEnergy() const { return true; }
exxScale() const207 double exxScale() const { return funcUnpolarized.cam_omega>0 ? funcUnpolarized.cam_beta : funcUnpolarized.cam_alpha; }
exxOmega() const208 double exxOmega() const { return funcUnpolarized.cam_omega; }
209
FunctionalLibXC(int xcCode,const char * typeName)210 FunctionalLibXC(int xcCode, const char* typeName)
211 { if(xc_func_init(&funcUnpolarized, xcCode, XC_UNPOLARIZED) != 0)
212 die("Error initializing LibXC unpolarized %s functional\n", typeName);
213 if(xc_func_init(&funcPolarized, xcCode, XC_POLARIZED) != 0)
214 die("Error initializing LibXC polarized %s functional\n", typeName);
215
216 logPrintf("Initialized LibXC %s functional '%s'\n", typeName, funcUnpolarized.info->name);
217
218 Citations::add("LibXC library of exchange-correlation functions",
219 "M. A. L. Marques, M. J. T. Oliveira and T. Burnus, Comput. Phys. Commun. 183, 2272 (2012)");
220 Citations::add(
221 funcUnpolarized.info->name + string(" ") + typeName + string(" functional"),
222 xc_func_info_get_ref(funcUnpolarized.info,0) );
223 }
~FunctionalLibXC()224 ~FunctionalLibXC()
225 { xc_func_end(&funcUnpolarized);
226 xc_func_end(&funcPolarized);
227 }
228
229 //! Like Functional::evaluate, except different spin components are stored together
230 //! and the computed energy is per-particle (e) instead of per volume (E).
evaluate(int nCount,int N,const double * n,const double * sigma,const double * lap,const double * tau,double * e,double * E_n,double * E_sigma,double * E_lap,double * E_tau) const231 void evaluate(int nCount, int N,
232 const double* n, const double* sigma, const double* lap, const double* tau,
233 double* e, double* E_n, double* E_sigma, double* E_lap, double* E_tau) const
234 {
235 assert(nCount==1 || nCount==2);
236 const xc_func_type& func = (nCount==1) ? funcUnpolarized : funcPolarized;
237 int sigmaCount = 2*nCount-1; //1 for unpolarized, 3 for polarized
238 int Nn = N * nCount;
239 int Nsigma = N * sigmaCount;
240 //Alloctae temporaries:
241 std::vector<double> eTemp(N);
242 std::vector<double> E_nTemp(E_n ? Nn : 0);
243 std::vector<double> E_sigmaTemp(E_n && needsSigma() ? Nsigma : 0);
244 std::vector<double> E_lapTemp(E_n && needsLap() ? Nn : 0);
245 std::vector<double> E_tauTemp(E_n && needsTau() ? Nn : 0);
246 //Invoke appropriate LibXC function in scratch space:
247 if(needsTau())
248 { //Project out problematic mGGA points (not handled correctly by LibXC 4:
249 #if XC_MAJOR_VERSION >= 4
250 for(int i=0; i<N; i++)
251 { double* nPtr = (double*)(n + i*nCount);
252 double* tauPtr = (double*)(tau + i*nCount);
253 double nTot = nCount==1 ? *nPtr : (*nPtr + *(nPtr+1));
254 double tauTot = nCount==1 ? *tauPtr : (*tauPtr + *(tauPtr+1));
255 double sigmaTot = nCount==1 ? sigma[i] : sigma[3*i]+2*sigma[3*i+1]+sigma[3*i+2];
256 bool zOffRange = 0.125*sigmaTot > (nTot * tauTot);
257 if(tauTot < tauCutoff) //Small tau
258 { if(nCount==1)
259 { *nPtr = 0.;
260 *tauPtr = 0.;
261 }
262 else
263 { *nPtr = *(nPtr+1) = 0.;
264 *tauPtr = *(tauPtr+1) = 0.;
265 }
266 }
267 else if(zOffRange && nTot>nCutoff) //tauVW/tau ratio out of range
268 { double tauTotNew = 0.125*sigmaTot/nTot; //von-Weisacker value
269 if(nCount==1)
270 *tauPtr = tauTotNew;
271 else
272 { double tauScale = tauTotNew/tauTot;
273 *tauPtr *= tauScale;
274 *(tauPtr+1) *= tauScale;
275 }
276 }
277 }
278 #endif
279 if(E_n) xc_mgga_exc_vxc(&func, N, n, sigma, lap, tau,
280 &eTemp[0], &E_nTemp[0], &E_sigmaTemp[0], &E_lapTemp[0], &E_tauTemp[0]);
281 else xc_mgga_exc(&func, N, n, sigma, lap, tau, &eTemp[0]);
282 }
283 else if(needsSigma())
284 { if(E_n) xc_gga_exc_vxc(&func, N, n, sigma, &eTemp[0], &E_nTemp[0], &E_sigmaTemp[0]); //need gradient
285 else xc_gga_exc(&func, N, n, sigma, &eTemp[0]);
286 }
287 else
288 { if(E_n) xc_lda_exc_vxc(&func, N, n, &eTemp[0], &E_nTemp[0]); //need gradient
289 else xc_lda_exc(&func, N, n, &eTemp[0]);
290 }
291 //Accumulate onto final results
292 eblas_daxpy(N, 1., &eTemp[0], 1, e, 1);
293 if(E_nTemp.size()) eblas_daxpy(Nn, 1., &E_nTemp[0], 1, E_n, 1);
294 if(E_sigmaTemp.size()) eblas_daxpy(Nsigma, 1., &E_sigmaTemp[0], 1, E_sigma, 1);
295 if(E_lapTemp.size()) eblas_daxpy(Nn, 1., &E_lapTemp[0], 1, E_lap, 1);
296 if(E_tauTemp.size()) eblas_daxpy(Nn, 1., &E_tauTemp[0], 1, E_tau, 1);
297 }
298
evaluate_thread(int iStart,int iStop,const FunctionalLibXC * func,int iOffset,int nCount,const double * n,const double * sigma,const double * lap,const double * tau,double * e,double * E_n,double * E_sigma,double * E_lap,double * E_tau)299 static void evaluate_thread(int iStart, int iStop, const FunctionalLibXC* func, int iOffset,
300 int nCount, const double* n, const double* sigma, const double* lap, const double* tau,
301 double* e, double* E_n, double* E_sigma, double* E_lap, double* E_tau)
302 {
303 int offs_e = (iOffset+iStart);
304 int offs_n = offs_e * nCount;
305 int offs_sigma = offs_e * (2*nCount-1);
306 int N = iStop-iStart; if(!N) return;
307 #define OFFSET(ptr,offset) ((ptr) ? ((ptr)+(offset)) : 0)
308 func->evaluate(nCount, N, OFFSET(n,offs_n), OFFSET(sigma,offs_sigma), OFFSET(lap,offs_n), OFFSET(tau,offs_n),
309 OFFSET(e,offs_e), OFFSET(E_n,offs_n), OFFSET(E_sigma,offs_sigma), OFFSET(E_lap,offs_n), OFFSET(E_tau,offs_n) );
310 #undef OFFSET
311 }
312
evaluateSub(int nCount,int iStart,int iStop,const double * n,const double * sigma,const double * lap,const double * tau,double * e,double * E_n,double * E_sigma,double * E_lap,double * E_tau) const313 void evaluateSub(int nCount, int iStart, int iStop,
314 const double* n, const double* sigma, const double* lap, const double* tau,
315 double* e, double* E_n, double* E_sigma, double* E_lap, double* E_tau) const
316 {
317 int N = iStop-iStart; if(!N) return;
318 threadLaunch(FunctionalLibXC::evaluate_thread, N, this, iStart,
319 nCount, n, sigma, lap, tau, e, E_n, E_sigma, E_lap, E_tau);
320 }
321 };
322
323 //! Convert a collection of scalar fields into an interleaved vector field.
324 //! result can be freed using delete[]
transpose(const ScalarFieldArray & inVec)325 template<unsigned M> double* transpose(const ScalarFieldArray& inVec)
326 { assert(inVec.size()==M);
327 const unsigned N = inVec[0]->nElem;
328 const double* in[M]; for(unsigned m=0; m<M; m++) in[m] = inVec[m]->data();
329 double *out = new double[M*N], *outPtr = out;
330 for(unsigned n=0; n<N; n++)
331 for(unsigned m=0; m<M; m++)
332 *(outPtr++) = in[m][n];
333 return out;
334 }
335
336 //! Convert an interleaved vector field to a collection of scalar fields
transpose(double * in,ScalarFieldArray & outVec)337 template<unsigned M> void transpose(double* in, ScalarFieldArray& outVec)
338 { assert(outVec.size()==M);
339 const unsigned N = outVec[0]->nElem;
340 double* out[M]; for(unsigned m=0; m<M; m++) out[m] = outVec[m]->data();
341 double *inPtr = in;
342 for(unsigned n=0; n<N; n++)
343 for(unsigned m=0; m<M; m++)
344 out[m][n] = *(inPtr++);
345 }
346
347 #endif //LIBXC_ENABLED
348
349 //----------------------- Functional List --------------------
350
351 struct FunctionalList
352 { std::vector<std::shared_ptr<Functional> > internal; //!<Functionals with an internal implementation
addFunctionalList353 void add(LDA_Variant variant, double scaleFac=1.0)
354 { internal.push_back(std::make_shared<FunctionalLDA>(variant, scaleFac));
355 }
addFunctionalList356 void add(GGA_Variant variant, double scaleFac=1.0)
357 { internal.push_back(std::make_shared<FunctionalGGA>(variant, scaleFac));
358 }
addFunctionalList359 void add(mGGA_Variant variant, double scaleFac=1.0)
360 { internal.push_back(std::make_shared<FunctionalMGGA>(variant, scaleFac));
361 }
362
363 #ifdef LIBXC_ENABLED
364 std::vector<std::shared_ptr<FunctionalLibXC> > libXC; //!<Functionals which use LibXC for evaluation
addFunctionalList365 void add(int xcCode, const char* name)
366 { libXC.push_back(std::make_shared<FunctionalLibXC>(xcCode, name));
367 }
368 #endif
369 };
370
371
372 //-------------- ExCorr members ----------------
373
374 extern EnumStringMap<ExCorrType> exCorrTypeMap;
375
ExCorr(ExCorrType exCorrType,KineticType kineticType)376 ExCorr::ExCorr(ExCorrType exCorrType, KineticType kineticType) : exCorrType(exCorrType), kineticType(kineticType), xcName(exCorrTypeMap.getString(exCorrType)),
377 exxScale(0.), exxOmega(0.), exxScaleOverride(0.), exxOmegaOverride(0.),
378 functionals(std::make_shared<FunctionalList>())
379 #ifdef LIBXC_ENABLED
380 , xcExchange(0), xcCorr(0), xcExcorr(0), xcKinetic(0)
381 #endif
382 {
383 }
384
setup(const Everything & everything)385 void ExCorr::setup(const Everything& everything)
386 { e = &everything;
387
388 string citeReason = xcName + " exchange-correlation functional";
389
390 switch(exCorrType)
391 {
392 #ifdef LIBXC_ENABLED
393 case ExCorrLibXC:
394 { if(xcExcorr)
395 { functionals->add(xcExcorr, "exchange-correlation");
396 exxScale = functionals->libXC.back()->exxScale(); //set exact exchange factor
397 exxOmega = functionals->libXC.back()->exxOmega(); //set exact exchange screening parameter
398 }
399 else
400 { functionals->add(xcExchange, "exchange");
401 exxScale = functionals->libXC.back()->exxScale(); //set exact exchange factor
402 exxOmega = functionals->libXC.back()->exxOmega(); //set exact exchange screening parameter
403 functionals->add(xcCorr, "correlation");
404 }
405 break;
406 }
407 if(exxScaleOverride) die("Exact-exchange scale factor cannot be overriden for LibXC functionals.\n")
408 if(exxOmegaOverride) die("Exact-exchange screening parameter cannot be overriden for LibXC functionals.\n")
409 #endif //LIBXC_ENABLED
410
411 case ExCorrLDA_PZ:
412 functionals->add(LDA_X_Slater);
413 functionals->add(LDA_C_PZ);
414 Citations::add(citeReason, "J.P. Perdew and A. Zunger, Phys. Rev. B 23, 5048 (1981)");
415 break;
416 case ExCorrLDA_PW:
417 functionals->add(LDA_X_Slater);
418 functionals->add(LDA_C_PW);
419 Citations::add(citeReason, "J.P. Perdew and Y. Wang, Phys. Rev. B 45, 13244 (1992)");
420 break;
421 case ExCorrLDA_PW_prec:
422 functionals->add(LDA_X_Slater);
423 functionals->add(LDA_C_PW_prec);
424 Citations::add(citeReason, "J.P. Perdew and Y. Wang, Phys. Rev. B 45, 13244 (1992)");
425 break;
426 case ExCorrLDA_VWN:
427 functionals->add(LDA_X_Slater);
428 functionals->add(LDA_C_VWN);
429 Citations::add(citeReason, "S.H. Vosko, L. Wilk and M. Nusair, Can. J. Phys. 58, 1200 (1980)");
430 break;
431 case ExCorrLDA_Teter:
432 functionals->add(LDA_XC_Teter);
433 Citations::add(citeReason, "S. Goedecker, M. Teter and J. Hutter, Phys. Rev. B 54, 1703 (1996)");
434 break;
435 case ExCorrGGA_PBE:
436 functionals->add(GGA_X_PBE);
437 functionals->add(GGA_C_PBE);
438 Citations::add(citeReason, "J.P. Perdew, K. Burke and M. Ernzerhof, Phys. Rev. Lett. 77, 3865 (1996)");
439 break;
440 case ExCorrGGA_PBEsol:
441 functionals->add(GGA_X_PBEsol);
442 functionals->add(GGA_C_PBEsol);
443 Citations::add(citeReason, "J.P. Perdew et al., Phys. Rev. Lett. 100, 136406 (2008)");
444 break;
445 case ExCorrGGA_PW91:
446 functionals->add(GGA_X_PW91);
447 functionals->add(GGA_C_PW91);
448 Citations::add(citeReason, "J.P. Perdew et al., Phys. Rev. B 46, 6671 (1992)");
449 break;
450 case ExCorrMGGA_TPSS:
451 functionals->add(mGGA_X_TPSS);
452 functionals->add(mGGA_C_TPSS);
453 Citations::add(citeReason, "J. Tao, J.P. Perdew, V.N. Staroverov and G. Scuseria, Phys. Rev. Lett. 91, 146401 (2003)");
454 break;
455 case ExCorrMGGA_revTPSS:
456 functionals->add(mGGA_X_revTPSS);
457 functionals->add(mGGA_C_revTPSS);
458 Citations::add(citeReason, "J.P. Perdew et al., Phys. Rev. Lett. 103, 026403 (2009)");
459 break;
460 case ExCorrORB_GLLBsc:
461 if(e->eInfo.spinType == SpinVector)
462 die("GLLLB-sc functional not implemented for noncollinear spin-polarized calculations.\n");
463 functionals->add(GGA_X_GLLBsc);
464 orbitalDep = std::make_shared<ExCorr_OrbitalDep_GLLBsc>(*e);
465 functionals->add(GGA_C_PBEsol);
466 Citations::add(citeReason, "M. Kuisma, J. Ojanen, J. Enkovaara and T. T. Rantala, Phys. Rev. B 82, 115106 (2010)");
467 break;
468 case ExCorrPOT_LB94:
469 functionals->add(LDA_X_Slater);
470 functionals->add(LDA_C_PZ);
471 functionals->add(GGA_X_LB94);
472 Citations::add(citeReason, "R. van Leeuwen and E. J. Baerends, Phys. Rev. A 49, 2421 (1994)");
473 break;
474 case ExCorrHYB_PBE0:
475 exxScale = exxScaleOverride ? exxScaleOverride : 1./4;
476 functionals->add(GGA_X_PBE, 1.-exxScale);
477 functionals->add(GGA_C_PBE);
478 Citations::add(citeReason, "M. Ernzerhof and G. E. Scuseria, J. Chem. Phys. 110, 5029 (1999)");
479 break;
480 case ExCorrHYB_HSE06:
481 exxOmega = exxOmegaOverride ? exxOmegaOverride : 0.11;
482 exxScale = exxScaleOverride ? exxScaleOverride : 1./4;
483 functionals->add(GGA_X_wPBE_SR, -exxScale);
484 functionals->add(GGA_X_PBE);
485 functionals->add(GGA_C_PBE);
486 Citations::add(citeReason, "A.V. Krukau, O.A. Vydrov, A.F. Izmaylov and G.E. Scuseria, J. Chem. Phys. 125, 224106 (2006)");
487 break;
488 case ExCorrHYB_HSE12:
489 exxOmega = exxOmegaOverride ? exxOmegaOverride : 0.185;
490 exxScale = exxScaleOverride ? exxScaleOverride : 0.313;
491 functionals->add(GGA_X_wPBE_SR, -exxScale);
492 functionals->add(GGA_X_PBE);
493 functionals->add(GGA_C_PBE);
494 Citations::add(citeReason, "J.E. Moussa, P.A. Schultz and J.R. Chelikowsky, J. Chem. Phys. 136, 204117 (2012)");
495 break;
496 case ExCorrHYB_HSE12s:
497 exxOmega = exxOmegaOverride ? exxOmegaOverride : 0.408;
498 exxScale = exxScaleOverride ? exxScaleOverride : 0.425;
499 functionals->add(GGA_X_wPBE_SR, -exxScale);
500 functionals->add(GGA_X_PBE);
501 functionals->add(GGA_C_PBE);
502 Citations::add(citeReason, "J.E. Moussa, P.A. Schultz and J.R. Chelikowsky, J. Chem. Phys. 136, 204117 (2012)");
503 break;
504 case ExCorrHF:
505 exxScale = 1.;
506 break;
507 }
508
509 if(exxScaleOverride && exxScale!=exxScaleOverride)
510 die("Exact-exchange scale factor override is only allowed for hybrid functionals.\n")
511 if(exxOmegaOverride && exxOmega!=exxOmegaOverride)
512 die("Exact-exchange screening parameter override is only allowed for screened hybrid functionals.\n")
513
514 if(exxScale)
515 { logPrintf("Will include %lg x ", exxScale);
516 if(!exxOmega) logPrintf("exact exchange.\n");
517 else logPrintf("screened exact exchange with range-parameter %lg\n", exxOmega);
518 }
519
520 switch(kineticType)
521 { case KineticNone:
522 break;
523 case KineticTF:
524 functionals->add(LDA_KE_TF);
525 Citations::add("Thomas-Fermi kinetic energy functional",
526 "L.H. Thomas, Proc. Cambridge Phil. Soc. 23, 542 (1927)\n"
527 "E. Fermi, Rend. Accad. Naz. Lincei 6, 602 (1927)");
528 break;
529 case KineticVW:
530 functionals->add(LDA_KE_TF);
531 functionals->add(GGA_KE_VW,1.);
532 Citations::add("Thomas-Fermi-von-Weisacker kinetic energy functional",
533 "C.F.v. Weizsacker, Z. Phys. 96, 431 (1935)");
534 break;
535 case KineticPW91:
536 functionals->add(GGA_KE_PW91);
537 Citations::add("PW91K kinetic energy functional",
538 "A. Lembarki and H. Chermette, Phys. Rev. A 50, 5328 (1994)");
539 break;
540 #ifdef LIBXC_ENABLED
541 case KineticLibXC:
542 functionals->add(xcKinetic, "kinetic");
543 break;
544 #endif //LIBXC_ENABLED
545 }
546 }
547
548
getName() const549 string ExCorr::getName() const
550 { return xcName;
551 }
552
exxFactor() const553 double ExCorr::exxFactor() const
554 { return exxScale;
555 }
556
exxRange() const557 double ExCorr::exxRange() const
558 { return exxOmega;
559 }
560
needsKEdensity() const561 bool ExCorr::needsKEdensity() const
562 { for(auto func: functionals->internal)
563 if(func->needsTau())
564 return true;
565 #ifdef LIBXC_ENABLED
566 for(auto func: functionals->libXC)
567 if(func->needsTau())
568 return true;
569 #endif
570 return false;
571 }
572
hasEnergy() const573 bool ExCorr::hasEnergy() const
574 { for(auto func: functionals->internal)
575 if(!func->hasEnergy())
576 return false;
577 #ifdef LIBXC_ENABLED
578 for(auto func: functionals->libXC)
579 if(!func->hasEnergy())
580 return false;
581 #endif
582 return true;
583 }
584
585 //! Extract a std::vector of data pointers from a VectorField array, along a specific Cartesian direction
dataPref(std::vector<ScalarFieldMultiplet<T,3>> & x,int iDir)586 template<typename T> std::vector<typename T::DataType*> dataPref(std::vector<ScalarFieldMultiplet<T,3> >& x, int iDir)
587 { std::vector<typename T::DataType*> xData(x.size());
588 for(unsigned s=0; s<x.size(); s++)
589 xData[s] = x[s][iDir] ? x[s][iDir]->dataPref() : 0;
590 return xData;
591 }
592
593 //! Extract a std::vector of const data pointers from a VectorField array, along a specific Cartesian direction
constDataPref(const std::vector<ScalarFieldMultiplet<T,3>> & x,int iDir)594 template<typename T> std::vector<const typename T::DataType*> constDataPref(const std::vector<ScalarFieldMultiplet<T,3> >& x, int iDir)
595 { std::vector<const typename T::DataType*> xData(x.size());
596 for(unsigned s=0; s<x.size(); s++)
597 xData[s] = x[s][iDir] ? x[s][iDir]->dataPref() : 0;
598 return xData;
599 }
600
601 //Return whether a functional should be included.
602 //Die if the masks are incompatible: eg. requesting X only from a combined XC functional
shouldInclude(const std::shared_ptr<Func> & functional,const IncludeTXC & includeTXC)603 template<typename Func> bool shouldInclude(const std::shared_ptr<Func>& functional, const IncludeTXC& includeTXC)
604 { bool T = functional->hasKinetic();
605 bool X = functional->hasExchange();
606 bool C = functional->hasCorrelation();
607 bool hasNeeded = (includeTXC.T && T) || (includeTXC.X && X) || (includeTXC.C && C);
608 bool hasUnneeded = (!includeTXC.T && T) || (!includeTXC.X && X) || (!includeTXC.C && C);
609 if(hasNeeded && hasUnneeded)
610 { string combination, spacer;
611 if(T) { combination += spacer + " kinetic"; spacer="-"; }
612 if(X) { combination += spacer + " exchange"; spacer="-"; }
613 if(C) { combination += spacer + " correlation"; spacer="-"; }
614 die("ExCorr cannot evaluate only some parts of combined %s functional.\n", combination.c_str());
615 }
616 return hasNeeded;
617 }
618
operator ()(const ScalarFieldArray & n,ScalarFieldArray * Vxc,IncludeTXC includeTXC,const ScalarFieldArray * tauPtr,ScalarFieldArray * Vtau,matrix3<> * Exc_RRT) const619 double ExCorr::operator()(const ScalarFieldArray& n, ScalarFieldArray* Vxc, IncludeTXC includeTXC,
620 const ScalarFieldArray* tauPtr, ScalarFieldArray* Vtau, matrix3<>* Exc_RRT) const
621 {
622 static StopWatch watch("ExCorrTotal"), watchComm("ExCorrCommunication"), watchFunc("ExCorrFunctional");
623 watch.start();
624
625 const int nInCount = n.size(); assert(nInCount==1 || nInCount==2 || nInCount==4);
626 const int nCount = std::min(nInCount, 2); //Number of spin-densities used in the parametrization of the functional
627 const int sigmaCount = 2*nCount-1;
628 const GridInfo& gInfo = n[0]->gInfo;
629
630 //------- Prepare inputs, allocate outputs -------
631
632 //Energy density per volume:
633 ScalarField E; nullToZero(E, gInfo);
634
635 //Gradient w.r.t spin densities:
636 bool needGradients = (Vxc or Exc_RRT); //need gradient propagation (for Vxc/Vtau, or for Exc_RRT)
637 if(Vxc) Vxc->clear();
638 ScalarFieldArray E_n(nCount);
639 if(needGradients) nullToZero(E_n, gInfo);
640
641 //Check for GGAs and meta GGAs:
642 bool needsSigma = false, needsTau=false, needsLap=false;
643 for(auto func: functionals->internal)
644 if(shouldInclude(func, includeTXC))
645 { needsSigma |= func->needsSigma();
646 needsLap |= func->needsLap();
647 needsTau |= func->needsTau();
648 }
649 #ifdef LIBXC_ENABLED
650 for(auto func: functionals->libXC)
651 if(shouldInclude(func, includeTXC))
652 { needsSigma |= func->needsSigma();
653 needsLap |= func->needsLap();
654 needsTau |= func->needsTau();
655 }
656 #endif
657
658 //Calculate spatial gradients for GGA (if needed)
659 std::vector<VectorField> Dn(nInCount);
660 int iDirStart, iDirStop;
661 TaskDivision(3, mpiWorld).myRange(iDirStart, iDirStop);
662 if(needsSigma)
663 { //Compute the gradients of the (spin-)densities:
664 for(int s=0; s<nInCount; s++)
665 { const ScalarFieldTilde Jn = J(n[s]);
666 for(int i=iDirStart; i<iDirStop; i++)
667 Dn[s][i] = I(D(Jn,i));
668 }
669 }
670
671 //Additional inputs/outputs for MGGAs (Laplacian, orbital KE and gradients w.r.t those)
672 ScalarFieldArray lap(nInCount), E_lap(nCount);
673 if(needsLap)
674 { //Compute laplacian
675 for(int s=0; s<nInCount; s++)
676 lap[s] = (1./gInfo.detR) * I(L(J(n[s])));
677 //Allocate gradient w.r.t laplacian if required
678 if(needGradients) nullToZero(E_lap, gInfo);
679 }
680 ScalarFieldArray tau(nInCount), E_tau(nCount);
681 if(needsTau)
682 { //make sure orbital KE density has been provided
683 assert(tauPtr);
684 tau = *tauPtr;
685 //allocate gradients w.r.t KE density if required
686 if(Vxc)
687 { assert(Vtau); //if computing gradients, all gradients must be computed
688 Vtau->clear();
689 }
690 if(needGradients) nullToZero(E_tau, gInfo);
691 }
692
693 //Transform to local spin-diagonal basis (noncollinear magnetism mode only)
694 ScalarFieldArray nCapped(nCount), lapIn(nCount), tauIn(nCount);
695 std::vector<VectorField> DnIn(nCount);
696 if(nCount != nInCount)
697 { assert(nCount==2); assert(nInCount==4);
698 std::swap(DnIn, Dn);
699 std::swap(lapIn, lap);
700 std::swap(tauIn, tau);
701 //Density:
702 nullToZero(nCapped, gInfo);
703 callPref(spinDiagonalize)(gInfo.nr, constDataPref(n), constDataPref(n), dataPref(nCapped));
704 //Spatial gradients:
705 if(needsSigma)
706 { for(int i=iDirStart; i<iDirStop; i++)
707 { for(int s=0; s<nCount; s++) nullToZero(Dn[s][i], gInfo);
708 callPref(spinDiagonalize)(gInfo.nr, constDataPref(n), constDataPref(DnIn,i), dataPref(Dn,i));
709 }
710 }
711 //Laplacian:
712 if(needsLap)
713 { nullToZero(lap, gInfo);
714 callPref(spinDiagonalize)(gInfo.nr, constDataPref(n), constDataPref(lapIn), dataPref(lap));
715 }
716 //KE density:
717 if(needsTau)
718 { nullToZero(tau, gInfo);
719 callPref(spinDiagonalize)(gInfo.nr, constDataPref(n), constDataPref(tauIn), dataPref(tau));
720 }
721 }
722
723 //Cap negative densities to 0:
724 if(!nCapped[0]) nCapped = clone(n);
725 double nMin, nMax;
726 for(int s=0; s<nCount; s++)
727 callPref(eblas_capMinMax)(gInfo.nr, nCapped[s]->dataPref(), nMin, nMax, 0.);
728
729 //Compute the required contractions for GGA:
730 ScalarFieldArray sigma(sigmaCount), E_sigma(sigmaCount);
731 if(needsSigma)
732 { for(int s1=0; s1<nCount; s1++)
733 for(int s2=s1; s2<nCount; s2++)
734 { for(int i=iDirStart; i<iDirStop; i++)
735 sigma[s1+s2] += Dn[s1][i] * Dn[s2][i];
736 watchComm.start();
737 nullToZero(sigma[s1+s2], gInfo);
738 sigma[s1+s2]->allReduceData(mpiWorld, MPIUtil::ReduceSum);
739 watchComm.stop();
740 }
741 //Allocate gradient if required:
742 if(needGradients) nullToZero(E_sigma, gInfo, sigmaCount);
743 }
744
745 #ifdef LIBXC_ENABLED
746 //------------------ Evaluate LibXC functionals ---------------
747 if(functionals->libXC.size())
748 { //Prepare input/output data on the CPU in transposed order (spins contiguous)
749 double *eData = E->data(), *nData=0, *sigmaData=0, *lapData=0, *tauData=0;
750 double *E_nData=0, *E_sigmaData=0, *E_lapData=0, *E_tauData=0;
751 if(nCount == 1)
752 { nData = nCapped[0]->data();
753 if(needsSigma) sigmaData = sigma[0]->data();
754 if(needsLap) lapData = lap[0]->data();
755 if(needsTau) tauData = tau[0]->data();
756 if(needGradients)
757 { E_nData = E_n[0]->data();
758 if(needsSigma) E_sigmaData = E_sigma[0]->data();
759 if(needsLap) E_lapData = E_lap[0]->data();
760 if(needsTau) E_tauData = E_tau[0]->data();
761 }
762 }
763 else //Need to interleave input spin-vector fields:
764 { nData = transpose<2>(nCapped);
765 if(needsSigma) sigmaData = transpose<3>(sigma);
766 if(needsLap) lapData = transpose<2>(lap);
767 if(needsTau) tauData = transpose<2>(tau);
768 if(needGradients)
769 { E_nData = new double[2*gInfo.nr]; eblas_zero(2*gInfo.nr, E_nData);
770 if(needsSigma) { E_sigmaData = new double[3*gInfo.nr]; eblas_zero(3*gInfo.nr, E_sigmaData); }
771 if(needsLap) { E_lapData = new double[2*gInfo.nr]; eblas_zero(2*gInfo.nr, E_lapData); }
772 if(needsTau) { E_tauData = new double[2*gInfo.nr]; eblas_zero(2*gInfo.nr, E_tauData); }
773 }
774 }
775
776 //Calculate all the required functionals:
777 watchFunc.start();
778 for(auto func: functionals->libXC)
779 if(shouldInclude(func, includeTXC))
780 func->evaluateSub(nCount, gInfo.irStart, gInfo.irStop, nData, sigmaData, lapData, tauData,
781 eData, E_nData, E_sigmaData, E_lapData, E_tauData);
782 watchFunc.stop();
783
784 //Uninterleave spin-vector field results:
785 if(nCount != 1)
786 { delete[] nData;
787 if(needsSigma) delete[] sigmaData;
788 if(needsLap) delete[] lapData;
789 if(needsTau) delete[] tauData;
790 if(needGradients)
791 { transpose<2>(E_nData, E_n); delete[] E_nData;
792 if(needsSigma) { transpose<3>(E_sigmaData, E_sigma); delete[] E_sigmaData; }
793 if(needsLap) { transpose<2>(E_lapData, E_lap); delete[] E_lapData; }
794 if(needsTau) { transpose<2>(E_tauData, E_tau); delete[] E_tauData; }
795 }
796 }
797
798 //Convert per-particle energy to energy density per volume
799 E = E * (nCount==1 ? nCapped[0] : nCapped[0]+nCapped[1]);
800 }
801 #endif //LIBXC_ENABLED
802
803 //---------------- Compute internal functionals ----------------
804 watchFunc.start();
805 for(auto func: functionals->internal)
806 if(shouldInclude(func, includeTXC))
807 func->evaluateSub(gInfo.irStart, gInfo.irStop,
808 constDataPref(nCapped), constDataPref(sigma), constDataPref(lap), constDataPref(tau),
809 E->dataPref(), dataPref(E_n), dataPref(E_sigma), dataPref(E_lap), dataPref(E_tau));
810 watchFunc.stop();
811
812 //Cleanup unneeded derived quantities (free memory before starting communications and gradient propagation)
813 double Exc = integral(E); E = 0; //note Exc accumulated over processes below in communication block
814 nCapped.clear();
815 sigma.clear();
816 lap.clear();
817 tau.clear();
818
819 //---------------- Collect results over processes ----------------
820 watchComm.start();
821 mpiWorld->allReduce(Exc, MPIUtil::ReduceSum);
822 for(ScalarField& x: E_n) if(x) x->allReduceData(mpiWorld, MPIUtil::ReduceSum);
823 for(ScalarField& x: E_sigma) if(x) x->allReduceData(mpiWorld, MPIUtil::ReduceSum);
824 for(ScalarField& x: E_lap) if(x) x->allReduceData(mpiWorld, MPIUtil::ReduceSum);
825 for(ScalarField& x: E_tau) if(x) x->allReduceData(mpiWorld, MPIUtil::ReduceSum);
826 watchComm.stop();
827
828 //--------------- Gradient propagation ---------------------
829 if(needGradients)
830 { //Change gradients from diagonal to spin-density-matrix if necessary
831 if(nCount != nInCount)
832 { //Density:
833 { ScalarFieldArray E_nIn(nInCount); nullToZero(E_nIn, gInfo);
834 callPref(spinDiagonalizeGrad)(gInfo.nr, constDataPref(n), constDataPref(n), constDataPref(E_n), dataPref(E_nIn), dataPref(E_nIn));
835 std::swap(E_nIn, E_n);
836 }
837 //KE density:
838 if(needsTau)
839 { ScalarFieldArray E_tauIn(nInCount); nullToZero(E_tauIn, gInfo);
840 callPref(spinDiagonalizeGrad)(gInfo.nr, constDataPref(n), constDataPref(tauIn), constDataPref(E_tau), dataPref(E_n), dataPref(E_tauIn));
841 std::swap(E_tauIn, E_tau);
842 }
843 else E_tau.resize(nInCount);
844 //Laplacian:
845 if(needsLap)
846 { ScalarFieldArray E_lapIn(nInCount); nullToZero(E_lapIn, gInfo);
847 callPref(spinDiagonalizeGrad)(gInfo.nr, constDataPref(n), constDataPref(lapIn), constDataPref(E_lap), dataPref(E_n), dataPref(E_lapIn));
848 std::swap(E_lapIn, E_lap);
849 lapIn.clear();
850 }
851 }
852
853 //Propagate Laplacian contribution to density
854 if(needsLap)
855 { for(int s=0; s<nInCount; s++)
856 { E_n[s] += Jdag((1./gInfo.detR) * L(Idag(E_lap[s])));
857 if(Exc_RRT) *Exc_RRT += Lstress(J(n[s]), J(E_lap[s]));
858 }
859 E_lap.clear();
860 }
861
862 //Propagate spatial gradient contribution to density
863 if(needsSigma)
864 { ScalarFieldTildeArray E_nTilde(nInCount); //contribution to the potential in fourier space
865 matrix3<> Esigma_RRT; //stress contribution through gradients
866 std::vector<VectorField> DnAll(nInCount); //gradients of all density components required for stress calculation
867 if(Exc_RRT)
868 { for(int s=0; s<nInCount; s++)
869 { const ScalarFieldTilde Jn = J(n[s]);
870 for(int i=0; i<3; i++)
871 DnAll[s][i] = (i>=iDirStart and i<iDirStop and nInCount==nCount) ? Dn[s][i] : I(D(Jn,i));
872 }
873 }
874 for(int i=iDirStart; i<iDirStop; i++)
875 { //Propagate from contraction sigma to the spatial derivatives
876 ScalarFieldArray E_Dni(nCount);
877 for(int s1=0; s1<nCount; s1++)
878 for(int s2=s1; s2<nCount; s2++)
879 { if(s1==s2) E_Dni[s1] += 2*(E_sigma[s1+s2] * Dn[s1][i]);
880 else
881 { E_Dni[s1] += E_sigma[s1+s2] * Dn[s2][i];
882 E_Dni[s2] += E_sigma[s1+s2] * Dn[s1][i];
883 }
884 }
885 //Convert from diagonal to spin-density-matrix if necessary
886 if(nCount != nInCount)
887 { ScalarFieldArray E_DniIn(nInCount); nullToZero(E_DniIn, gInfo);
888 callPref(spinDiagonalizeGrad)(gInfo.nr, constDataPref(n), constDataPref(DnIn,i), constDataPref(E_Dni), dataPref(E_n), dataPref(E_DniIn));
889 std::swap(E_DniIn, E_Dni);
890 }
891 //Propagate to E_nTilde:
892 for(int s=0; s<nInCount; s++)
893 E_nTilde[s] -= D(Idag(E_Dni[s]), i);
894 //Propagate to lattice derivative:
895 if(Exc_RRT)
896 { for(int s=0; s<nInCount; s++)
897 for(int j=0; j<3; j++)
898 Esigma_RRT(i,j) -= dot(E_Dni[s], DnAll[s][j]) * gInfo.dV;
899 }
900 }
901 //Accumulate over processes:
902 for(int s=0; s<nInCount; s++)
903 { watchComm.start();
904 nullToZero(E_nTilde[s], gInfo);
905 E_nTilde[s]->allReduceData(mpiWorld, MPIUtil::ReduceSum);
906 watchComm.stop();
907 E_n[s] += Jdag(E_nTilde[s]);
908 }
909 if(Exc_RRT)
910 { mpiWorld->allReduce(Esigma_RRT, MPIUtil::ReduceSum);
911 *Exc_RRT += Esigma_RRT;
912 }
913 }
914 }
915
916 if(Vxc) *Vxc = E_n;
917 if(Vtau) *Vtau = E_tau;
918 watch.stop();
919 return Exc;
920 }
921
922 //Unpolarized wrapper to above function:
operator ()(const ScalarField & n,ScalarField * Vxc,IncludeTXC includeTXC,const ScalarField * tau,ScalarField * Vtau,matrix3<> * Exc_RRT) const923 double ExCorr::operator()(const ScalarField& n, ScalarField* Vxc, IncludeTXC includeTXC,
924 const ScalarField* tau, ScalarField* Vtau, matrix3<>* Exc_RRT) const
925 { ScalarFieldArray VxcArr(1), tauArr(1), VtauArr(1);
926 if(tau) tauArr[0] = *tau;
927 double Exc = (*this)(ScalarFieldArray(1, n), Vxc ? &VxcArr : 0, includeTXC,
928 tau ? &tauArr :0, Vtau ? &VtauArr : 0, Exc_RRT);
929 if(Vxc) *Vxc = VxcArr[0];
930 if(Vtau) *Vtau = VtauArr[0];
931 return Exc;
932 }
933
setMask(size_t iStart,size_t iStop,const double * n,double * mask,double nCut)934 inline void setMask(size_t iStart, size_t iStop, const double* n, double* mask, double nCut)
935 { for(size_t i=iStart; i<iStop; i++) mask[i] = (n[i]<nCut ? 0. : 1.);
936 }
937
getSecondDerivatives(const ScalarField & n,ScalarField & e_nn,ScalarField & e_sigma,ScalarField & e_nsigma,ScalarField & e_sigmasigma,double nCut) const938 void ExCorr::getSecondDerivatives(const ScalarField& n, ScalarField& e_nn, ScalarField& e_sigma, ScalarField& e_nsigma, ScalarField& e_sigmasigma, double nCut) const
939 {
940 //Check for GGAs and meta GGAs:
941 bool needsSigma = false, needsTau=false, needsLap=false;
942 for(auto func: functionals->internal)
943 if(!func->hasKinetic())
944 { needsSigma |= func->needsSigma();
945 needsLap |= func->needsLap();
946 needsTau |= func->needsTau();
947 }
948 #ifdef LIBXC_ENABLED
949 for(auto func: functionals->libXC)
950 if(!func->hasKinetic())
951 { needsSigma |= func->needsSigma();
952 needsLap |= func->needsLap();
953 needsTau |= func->needsTau();
954 }
955 #endif
956
957 if(needsTau || needsLap) die("Second derivatives implemented only for LDAs and GGAs.\n");
958
959 const double eps = 1e-7; //Order sqrt(double-precision epsilon)
960 const double scalePlus = 1.+eps;
961 const double scaleMinus = 1.-eps;
962 const ScalarField nPlus = scalePlus * n;
963 const ScalarField nMinus = scaleMinus * n;
964 const GridInfo& gInfo = n->gInfo;
965
966 //Compute mask to zero out low density regions
967 ScalarField mask(ScalarFieldData::alloc(gInfo));
968 threadLaunch(setMask, gInfo.nr, n->data(), mask->data(), nCut);
969
970 //Compute gradient-squared for GGA
971 ScalarField sigma, sigmaPlus, sigmaMinus;
972 if(needsSigma)
973 { sigma = lengthSquared(gradient(n));
974 sigmaPlus = scalePlus * sigma;
975 sigmaMinus = scaleMinus * sigma;
976 }
977
978 //Configurations of n and sigma, and the gradients w.r.t them:
979 struct Config
980 { const ScalarField *n, *sigma;
981 ScalarField e_n, e_sigma;
982 };
983 std::vector<Config> configs(5);
984 configs[0].n = &n; configs[0].sigma = σ //original point
985 configs[1].n = &nPlus; configs[1].sigma = σ // + dn
986 configs[2].n = &nMinus; configs[2].sigma = σ // - dn
987 configs[3].n = &n; configs[3].sigma = &sigmaPlus; // + dsigma
988 configs[4].n = &n; configs[4].sigma = &sigmaMinus; // - dsigma
989
990 //Compute the gradients at all the configurations:
991 ScalarField eTmp; nullToZero(eTmp, gInfo); //temporary energy return value (ignored)
992 for(int i=0; i<(needsSigma ? 5 : 3); i++)
993 { Config& c = configs[i];
994 std::vector<const double*> nData(1), sigmaData(1), lapData(1), tauData(1);
995 std::vector<double*> e_nData(1), e_sigmaData(1), e_lapData(1), e_tauData(1);
996 double* eData = eTmp->dataPref();
997 nData[0] = (*c.n)->dataPref();
998 nullToZero(c.e_n, gInfo);
999 e_nData[0] = c.e_n->dataPref();
1000 if(needsSigma)
1001 { sigmaData[0] = (*c.sigma)->dataPref();
1002 nullToZero(c.e_sigma, gInfo);
1003 e_sigmaData[0] = c.e_sigma->dataPref();
1004 }
1005 #ifdef LIBXC_ENABLED
1006 //Compute LibXC functionals:
1007 for(auto func: functionals->libXC)
1008 if(!func->hasKinetic())
1009 func->evaluate(1, gInfo.nr, nData[0], sigmaData[0], lapData[0], tauData[0],
1010 eData, e_nData[0], e_sigmaData[0], e_lapData[0], e_tauData[0]);
1011 #endif
1012 //Compute internal functionals:
1013 for(auto func: functionals->internal)
1014 if(!func->hasKinetic())
1015 func->evaluate(gInfo.nr, nData, sigmaData, lapData, tauData,
1016 eData, e_nData, e_sigmaData, e_lapData, e_tauData);
1017 }
1018
1019 //Compute finite difference derivatives:
1020 ScalarField nDen = (0.5/eps) * inv(n) * mask;
1021 e_nn = nDen * (configs[1].e_n - configs[2].e_n);
1022 if(needsSigma)
1023 { ScalarField sigmaDen = (0.5/eps) * inv(sigma) * mask;
1024 e_sigma = configs[0].e_sigma*mask; //First derivative available analytically
1025 e_nsigma = 0.5*(nDen * (configs[1].e_sigma - configs[2].e_sigma) + sigmaDen * (configs[3].e_n - configs[4].e_n));
1026 e_sigmasigma = sigmaDen * (configs[3].e_sigma - configs[4].e_sigma);
1027 }
1028 else
1029 { e_sigma = 0;
1030 e_nsigma = 0;
1031 e_sigmasigma = 0;
1032 }
1033 }
1034