1 /*-------------------------------------------------------------------
2 Copyright 2012 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #include <core/Coulomb_ExchangeEval.h>
21 #include <core/CoulombIsolated.h>
22 #include <core/CoulombWire.h>
23 #include <core/CoulombKernel.h>
24 #include <core/Coulomb_internal.h>
25 #include <core/LatticeUtils.h>
26 #include <core/LoopMacros.h>
27 #include <core/BlasExtra.h>
28 #include <core/Util.h>
29 #include <gsl/gsl_sf.h>
30 
31 //-------------- Auxiliary Function method --------------------------
32 // Based on [ P. Carrier, S. Rohra and A. Gorling, Phys. Rev. B 75, 205126 (2007) ]
33 // for 3D, and generalized to other geometries by R. Sundararaman et al, PRB 87, 165122 (2013)
34 
35 // Reciprocal space function periodic on Brillouin zone which ~ G^2 near G=0
36 // and is strictly > 0 everywhere else in the zone for any crystal system
37 // g is a dimensionless coordinate (coefficients to reciprocal lattice vectors)
38 // GGT is the reciprocal space metric for this dimensionless coordinate
effGsq(const vector3<> & g,const matrix3<> & GGT)39 inline double effGsq(const vector3<>& g, const matrix3<>& GGT)
40 {	vector3<> sinPi; for(int i=0; i<3; i++) sinPi[i] = sin(M_PI*g[i]);
41 	vector3<> sin2Pi; for(int i=0; i<3; i++) sin2Pi[i] = sin(2*M_PI*g[i]);
42 	return (1./(M_PI*M_PI)) *
43 		( sinPi[0]*sinPi[0]*GGT(0,0) + sinPi[1]*sinPi[1]*GGT(1,1) + sinPi[2]*sinPi[2]*GGT(2,2)
44 		+ 0.5*(sin2Pi[0]*sin2Pi[1]*GGT(0,1) + sin2Pi[1]*sin2Pi[2]*GGT(1,2) + sin2Pi[2]*sin2Pi[0]*GGT(2,0)));
45 }
46 
47 //Singular function for 3D periodic systems (from Carrier et al., but generalized
48 //to include screened exchange, which although not singular, benefits from regularization
49 //when the effective supercell is smaller than the length scale of screening)
fSingular3D(const vector3<> & g,const matrix3<> & GGT,double omegaSq)50 inline double fSingular3D(const vector3<>& g, const matrix3<>& GGT, double omegaSq)
51 {	double Gsq = effGsq(g, GGT);
52 	if(omegaSq) return Gsq ? (1.-exp(-0.25*Gsq/omegaSq))*(4*M_PI)/Gsq : M_PI/omegaSq;
53 	else return Gsq ? (4*M_PI)/Gsq : 0.;
54 }
55 
56 //Singular function for 2D periodic systems (slab geometry)
57 //Assumes g=0 along the truncated direction
fSingular2D(const vector3<> & g,const matrix3<> & GGT,double omegaSq)58 inline double fSingular2D(const vector3<>& g, const matrix3<>& GGT, double omegaSq)
59 {	double G = sqrt(effGsq(g, GGT));
60 	if(omegaSq)
61 	{	double hlfOmegaInv=0.5/sqrt(omegaSq);
62 		return (2*M_PI) * hlfOmegaInv * erf_by_x(hlfOmegaInv*G);
63 	}
64 	else return G ? 2*M_PI/G : 0.;
65 }
66 
67 //Singular function for 1D periodic systems (wire geometry)
68 //Assumes g=0 along the truncated directions
fSingular1D(const vector3<> & g,const matrix3<> & GGT,double omegaSq)69 inline double fSingular1D(const vector3<>& g, const matrix3<>& GGT, double omegaSq)
70 {	double Gsq = effGsq(g, GGT);
71 	if(omegaSq) return Gsq ? gsl_sf_expint_Ei(-0.25*Gsq/omegaSq)-log(Gsq) : M_EULER-log(4.*omegaSq);
72 	else return Gsq ? log(4)-2*M_EULER-log(Gsq) : 0.;
73 }
74 
75 //Integrate fSingular on a box of size scale and center gCenter (in reciprocal lattice coordinates)]
fSingularIntegralBox(FSingular fSingular,vector3<bool> isTruncated,double scale,const vector3<> gCenter,const matrix3<> & GGT,double omegaSq)76 template<typename FSingular> double fSingularIntegralBox(FSingular fSingular, vector3<bool> isTruncated,
77 	double scale, const vector3<> gCenter, const matrix3<>& GGT, double omegaSq)
78 {	//Weights and abscissae of the 15-point gauss quadrature:
79 	const int N = 7;
80 	static const double w[N+1] =
81 	{	0.030753241996117268354628393577204, 0.070366047488108124709267416450667,
82 		0.107159220467171935011869546685869, 0.139570677926154314447804794511028,
83 		0.166269205816993933553200860481209, 0.186161000015562211026800561866423,
84 		0.198431485327111576456118326443839, 0.202578241925561272880620199967519
85 	};
86 	static const double x[N+1] =
87 	{	0.987992518020485428489565718586613, 0.937273392400705904307758947710209,
88 		0.848206583410427216200648320774217, 0.724417731360170047416186054613938,
89 		0.570972172608538847537226737253911, 0.394151347077563369897207370981045,
90 		0.201194093997434522300628303394596, 0.000000000000000000000000000000000
91 	};
92 	int N0 = isTruncated[0] ? 0 : N;
93 	int N1 = isTruncated[1] ? 0 : N;
94 	int N2 = isTruncated[2] ? 0 : N;
95 	double ret = 0.0;
96 	double h = 0.5 * scale;
97 	vector3<> g;
98 	for(int i0=-N0; i0<=N0; i0++)
99 	{	g[0] = gCenter[0] + h*x[N-abs(i0)]*(i0>0?1:-1);
100 		double w0 = (N0 ? h*w[N-abs(i0)] : 1.);
101 		for(int i1=-N1; i1<=N1; i1++)
102 		{	g[1] = gCenter[1] + h*x[N-abs(i1)]*(i1>0?1:-1);
103 			double w01 = w0 * (N1 ? h*w[N-abs(i1)] : 1.);
104 			for(int i2=-N2; i2<=N2; i2++)
105 			{	g[2] = gCenter[2] + h*x[N-abs(i2)]*(i2>0?1:-1);
106 				double w012 = w01 * (N2 ? h*w[N-abs(i2)] : 1.);
107 				ret += w012 * fSingular(g, GGT, omegaSq);
108 			}
109 		}
110 	}
111 	return ret;
112 }
113 
114 //Integrate fSingular between the box of size scale and scale/3 centered at the origin (in reciprocal lattice coordinates)
fSingularIntegralBoxDiff(FSingular fSingular,vector3<bool> isTruncated,double scale,const matrix3<> & GGT,double omegaSq)115 template<typename FSingular> double fSingularIntegralBoxDiff(FSingular fSingular, vector3<bool> isTruncated,
116 	double scale, const matrix3<>& GGT, double omegaSq)
117 {	double scaleBy3 = scale/3.;
118 	int N0 = isTruncated[0] ? 0 : 1;
119 	int N1 = isTruncated[1] ? 0 : 1;
120 	int N2 = isTruncated[2] ? 0 : 1;
121 	double ret = 0.0;
122 	vector3<int> ig;
123 	for(ig[0]=-N0; ig[0]<=N0; ig[0]++)
124 	for(ig[1]=-N1; ig[1]<=N1; ig[1]++)
125 	for(ig[2]=-N2; ig[2]<=N2; ig[2]++)
126 		if(ig.length_squared()) //except the center box
127 			ret += fSingularIntegralBox<>(fSingular, isTruncated, scaleBy3, scaleBy3*ig, GGT, omegaSq);
128 	return ret;
129 }
130 
131 //Compute the difference between the integral of fSingular (scaled appropriately) and its sum over kmesh
fSingularIntegralMinusSum(FSingular fSingular,vector3<bool> isTruncated,const matrix3<> & GGT,double omegaSq,const std::vector<vector3<>> & kmesh)132 template<typename FSingular> double fSingularIntegralMinusSum(FSingular fSingular, vector3<bool> isTruncated,
133 	const matrix3<>& GGT, double omegaSq, const std::vector<vector3<>>& kmesh)
134 {	//Compute the integral of fSingular over the brillouin zone
135 	double fSingularIntegral = 0.;
136 	for(double scale=1.0; scale>1e-16; scale/=3.)
137 		fSingularIntegral += fSingularIntegralBoxDiff(fSingular, isTruncated, scale, GGT, omegaSq);
138 	//Compute the sum over the k-point mesh:
139 	double fSingularSum = 0.;
140 	for(unsigned i=0; i<kmesh.size(); i++)
141 		fSingularSum += fSingular(kmesh[i]-kmesh[0], GGT, omegaSq);
142 	//Return difference:
143 	return fSingularIntegral * kmesh.size() - fSingularSum;
144 }
145 
146 //Return the Vzero correction for specified lattice vectors, kmesh, truncation mode etc.
getAuxiliaryFunctionVzero(matrix3<> R,vector3<bool> isTruncated,int iDir,const std::vector<vector3<>> & kmesh,double omegaSq)147 inline double getAuxiliaryFunctionVzero(matrix3<> R, vector3<bool> isTruncated, int iDir, const std::vector<vector3<>>& kmesh, double omegaSq)
148 {	//Compute lattice vector-dependent quantities (recomputed here for lattice derivative):
149 	double detR = fabs(det(R));
150 	matrix3<> G = 2*M_PI*inv(R);
151 	matrix3<> GGT = G*(~G);
152 	//Switch based on dimension:
153 	int nPeriodic=0;
154 	for(int k=0; k<3; k++)
155 		if(!isTruncated[k])
156 			nPeriodic++;
157 	switch(nPeriodic)
158 	{	case 0:
159 			assert(!"Auxiliary function method meaningless for isolated geometry.\n");
160 			return 0.;
161 		case 1:
162 			return (detR/R.column(iDir).length()) //transverse area to untruncated axis
163 				* fSingularIntegralMinusSum(fSingular1D, isTruncated, GGT, omegaSq, kmesh);
164 		case 2:
165 			return R.column(iDir).length() //truncated axis length
166 				* fSingularIntegralMinusSum(fSingular2D, isTruncated, GGT, omegaSq, kmesh);
167 		case 3:
168 			return fSingularIntegralMinusSum(fSingular3D, isTruncated, GGT, omegaSq, kmesh);
169 		default:
170 			return 0.; //Should never reach here (just to avoid compiler warnings)
171 	}
172 }
173 
174 //---------------- Spherical truncation for screened exchange --------------
erfcIntegrand(double r,void * params)175 double erfcIntegrand(double r, void* params)
176 {	double omega = *((double*)params);
177 	return erfc(omega * r);
178 }
truncatedErfcTilde(double G,double omega,double Rc)179 double truncatedErfcTilde(double G, double omega, double Rc)
180 {	if(G==0)
181 	{	double RcSq = Rc * Rc;
182 		double omegaSq = omega * omega;
183 		return M_PI * (2*Rc*(Rc - exp(-RcSq*omegaSq)/(omega*sqrt(M_PI)))
184 			+ (1./omegaSq - 2*RcSq) * erf(omega*Rc));
185 	}
186 	assert(G > 0.);
187 	const int nBisections = 20;
188 	gsl_integration_workspace* iWS = gsl_integration_workspace_alloc(nBisections);
189 	gsl_integration_qawo_table* qawoTable = gsl_integration_qawo_table_alloc(G, Rc, GSL_INTEG_SINE, nBisections);
190 	gsl_function f = { &erfcIntegrand, &omega };
191 	double result, err;
192 	gsl_integration_qawo(&f, 0., 0., 1e-12*std::max(0.1,G*Rc), nBisections, iWS, qawoTable, &result, &err);
193 	gsl_integration_qawo_table_free(qawoTable);
194 	gsl_integration_workspace_free(iWS);
195 	return (4*M_PI/G) * result;
196 }
197 
198 //---------------- Wigner-Seitz truncated exchange ---------------
199 
200 //Extract unit cell exchange kernel for a particualr k-point difference dk
201 //from supercell kernel in dataSuper, and store it in data
extractExchangeKernel_thread(size_t iStart,size_t iStop,const vector3<> & dk,const vector3<int> & S,const vector3<int> & Ssuper,const matrix3<int> & super,const double * dataSuper,double * data,const symmetricMatrix3<> * dataSuper_RRT,symmetricMatrix3<> * data_RRT)202 void extractExchangeKernel_thread(size_t iStart, size_t iStop, const vector3<>& dk,
203 	const vector3<int>& S, const vector3<int>& Ssuper, const matrix3<int>& super,
204 	const double* dataSuper, double* data, const symmetricMatrix3<>* dataSuper_RRT, symmetricMatrix3<>* data_RRT)
205 {	//Get the integer vector corresponding to dk in the supercell:
206 	double err;
207 	vector3<int> dkSuper = round(dk * super, &err); //note: transformation is by transpose of super
208 	assert(err < symmThreshold);
209 	//Collect data from supercell to unit-cell with k-point offset:
210 	THREAD_fullGspaceLoop
211 	(	vector3<int> iGsuper = iG * super + dkSuper;
212 		//Reduce to inversion-symmetry reduced index in supercell data:
213 		if(iGsuper[2]<0) iGsuper = -iGsuper;
214 		if(iGsuper[1]<0) iGsuper[1] += Ssuper[1];
215 		if(iGsuper[0]<0) iGsuper[0] += Ssuper[0];
216 		size_t iSuper = iGsuper[2] + (1+Ssuper[2]/2) * size_t(iGsuper[1] + Ssuper[1]*iGsuper[0]);
217 		data[i] = dataSuper[iSuper];
218 		if(data_RRT) data_RRT[i] = dataSuper_RRT[iSuper];
219 	)
220 }
221 
222 
223 //-------------------- class ExchangeEval -----------------------
224 
ExchangeEval(const GridInfo & gInfo,const CoulombParams & params,const Coulomb & coulomb,double omega)225 ExchangeEval::ExchangeEval(const GridInfo& gInfo, const CoulombParams& params, const Coulomb& coulomb, double omega)
226 : gInfo(gInfo), omega(omega), VcGamma(0), VcGamma_RRT(0), kernelData(0)
227 {
228 	if(!omega) logPrintf("\n-------- Setting up exchange kernel --------\n");
229 	else logPrintf("\n--- Setting up screened exchange kernel (omega = %lg) ---\n", omega);
230 
231 	//Obtain supercell parameters, and adjust for mesh embedding where necessary:
232 	assert(params.supercell);
233 	const matrix3<int>& super = params.supercell->super;
234 	const std::vector< vector3<> >& kmesh = params.supercell->kmesh;
235 	matrix3<> Rsuper = gInfo.R * super; //this could differ from supercell->Rsuper, because the embedding gInfo.R is scaled up from the original gInfo.R
236 
237 	//Check supercell:
238 	if(params.geometry != CoulombParams::Periodic)
239 	{	vector3<bool> isTruncated = params.isTruncated();
240 		//Make sure that the supercell and unit cell are identical in truncated directions:
241 		for(int k=0; k<3; k++)
242 			if(isTruncated[k])
243 			{	vector3<int> comb = super.column(k); //linear combinations of lattice-vectors in k^th supercell vector
244 				if((comb.length_squared()!=1) || (abs(comb[k])!=1))
245 				{	string dirName(3, '0'); dirName[k] = '1';
246 					die("More than one k-point along truncated lattice direction %s\n", dirName.c_str());
247 				}
248 			}
249 	}
250 
251 	//Select kernel mode:
252 	switch(params.exchangeRegularization)
253 	{	//G=0 correction based modes:
254 		case CoulombParams::None:
255 		case CoulombParams::AuxiliaryFunction:
256 		case CoulombParams::ProbeChargeEwald:
257 			switch(params.geometry)
258 			{	case CoulombParams::Periodic:
259 					kernelMode = PeriodicKernel; break;
260 				case CoulombParams::Slab:
261 					kernelMode = SlabKernel; break;
262 				case CoulombParams::Wire:
263 				case CoulombParams::Cylindrical:
264 					kernelMode = NumericalKernel; break;
265 				case CoulombParams::Spherical:
266 					kernelMode = SphericalKernel; break;
267 				case CoulombParams::Isolated:
268 					kernelMode = WignerSeitzGammaKernel; break;
269 			}
270 			if(params.geometry==CoulombParams::Periodic)
271 			{	if(params.exchangeRegularization==CoulombParams::AuxiliaryFunction)
272 					Citations::add("Auxiliary function method for exact exchange in arbitrary lattice systems", "P. Carrier et al., Phys. Rev. B 75, 205126 (2007)");
273 				if(params.exchangeRegularization==CoulombParams::ProbeChargeEwald)
274 					Citations::add("Probe-charge Ewald method for exact exchange", "J. Paier et al, J. Chem. Phys. 122, 234102 (2005)");
275 			}
276 			else if(params.exchangeRegularization!=CoulombParams::None)
277 				Citations::add("Exact exchange in reduced-dimensionality systems", wsTruncationPaper);
278 			break;
279 		//Truncation based modes:
280 		case CoulombParams::SphericalTruncated:
281 			Citations::add("Spherical truncated method for exact exchange", "J. Spencer et al, Phys. Rev. B 77, 193110 (2008)");
282 			kernelMode = SphericalKernel; break;
283 		case CoulombParams::WignerSeitzTruncated:
284 			Citations::add("Wigner-Seitz truncated method for exact exchange", wsTruncationPaper);
285 			kernelMode = NumericalKernel; break;
286 	}
287 
288 	//Perform G=0 handling if required
289 	double VzeroCorrection = 0.;
290 	matrix3<> VzeroCorrection_RRT; //optional lattice derivative
291 	double detRsuper = fabs(det(Rsuper));
292 
293 	if(params.exchangeRegularization==CoulombParams::AuxiliaryFunction)
294 	{	double omegaSq = omega*omega;
295 		vector3<bool> isTruncated = params.isTruncated();
296 		VzeroCorrection = getAuxiliaryFunctionVzero(gInfo.R, isTruncated, params.iDir, kmesh, omegaSq);
297 		//Compute lattice derivative if requied:
298 		if(params.computeStress)
299 		{	const double h = 1e-5; //finite difference size
300 			matrix3<> id(1,1,1); //identity
301 			std::vector<matrix3<>> strainBasis(6);
302 			for(int iDir=0; iDir<3; iDir++)
303 			{	//Tensions:
304 				strainBasis[iDir](iDir,iDir) = 1.;
305 				int jDir = (iDir+1)%3;
306 				int kDir = (iDir+2)%3;
307 				//Shears:
308 				strainBasis[iDir+3](jDir,kDir) = sqrt(0.5);
309 				strainBasis[iDir+3](kDir,jDir) = sqrt(0.5);
310 			}
311 			for(matrix3<> strainDir: strainBasis)
312 			{	double VzeroCorrection_dir = (0.5/h) *
313 					( getAuxiliaryFunctionVzero((id+h*strainDir)*gInfo.R, isTruncated, params.iDir, kmesh, omegaSq)
314 					- getAuxiliaryFunctionVzero((id-h*strainDir)*gInfo.R, isTruncated, params.iDir, kmesh, omegaSq) );
315 				VzeroCorrection_RRT += VzeroCorrection_dir * strainDir;
316 			}
317 		}
318 	}
319 
320 	if(params.exchangeRegularization==CoulombParams::ProbeChargeEwald)
321 	{	double Eperiodic = 0.; //Periodic interaction of a point charge in supercell geometry
322 		matrix3<> Eperiodic_RRT;
323 		if(omega) //Directly compute the periodic interaction in real space
324 		{	matrix3<> invRsuper = inv(Rsuper);
325 			vector3<bool> isTruncated = params.isTruncated();
326 			vector3<int> Nreal(0,0,0);
327 			double rMax = CoulombKernel::nSigmasPerWidth * sqrt(0.5)/omega;
328 			for(int k=0; k<3; k++)
329 				if(!isTruncated[k]) //no sampling along truncated directions
330 					Nreal[k] = 1+ceil(rMax * invRsuper.row(k).length());
331 			//Loop over neighbouring cells in real space:
332 			matrix3<> RsuperTRsuper = (~Rsuper)*Rsuper;
333 			vector3<int> iR; //integer cell number
334 			double omegaSq = omega*omega;
335 			for(iR[0]=-Nreal[0]; iR[0]<=Nreal[0]; iR[0]++)
336 				for(iR[1]=-Nreal[1]; iR[1]<=Nreal[1]; iR[1]++)
337 					for(iR[2]=-Nreal[2]; iR[2]<=Nreal[2]; iR[2]++)
338 					{	double rSq = RsuperTRsuper.metric_length_squared(iR);
339 						if(!rSq) continue; //exclude self-interaction
340 						double r = sqrt(rSq);
341 						Eperiodic += 0.5*erfc(omega*r)/r;
342 						if(params.computeStress)
343 						{	vector3<> rVec = Rsuper * iR;
344 							double minus_E_r_by_r = 0.5*(erfc(omega*r)/r + (2./sqrt(M_PI))*omega*exp(-omegaSq*rSq))/rSq;
345 							Eperiodic_RRT -= minus_E_r_by_r * outer(rVec,rVec);
346 						}
347 					}
348 		}
349 		else //Use the appropriate Ewald method
350 		{	std::vector<Atom> atoms(1, Atom(1., vector3<>())); //single unit point charge
351 			Eperiodic = coulomb.createEwald(Rsuper, 1)->energyAndGrad(atoms, params.computeStress ? &Eperiodic_RRT : 0);
352 			//Correction for G=0 difference between cylinder and wire truncation modes:
353 			if(params.geometry == CoulombParams::Cylindrical)
354 			{	double rho0 = ((CoulombCylindrical&)coulomb).Rc; //cylinder mode uses this as reference rho in logarithmic singularity
355 				double L = Rsuper.column(params.iDir).length();
356 				Eperiodic -= log(rho0) / L;
357 				if(params.computeStress)
358 				{	vector3<> zHat = (1./L) * Rsuper.column(params.iDir);
359 					Eperiodic_RRT += (log(rho0) / L) * outer(zHat,zHat);
360 				}
361 			}
362 		}
363 		VzeroCorrection = (-2.*detRsuper) * Eperiodic;
364 		if(params.computeStress)
365 			VzeroCorrection_RRT = (-2.*detRsuper) * (Eperiodic_RRT + matrix3<>(1,1,1)*Eperiodic);
366 	}
367 
368 	if(VzeroCorrection) logPrintf("Vxx(G=0) correction = %le\n", VzeroCorrection/detRsuper);
369 
370 	//Setup the kernel depending on the kernel mode:
371 	switch(kernelMode)
372 	{	case PeriodicKernel:
373 		{	Vzero = VzeroCorrection + (omega ? M_PI/(omega*omega) : 0.);
374 			Vzero_RRT = VzeroCorrection_RRT;
375 			logPrintf("3D periodic kernel with %s G=0.\n", VzeroCorrection ? "modified" : "unmodified");
376 			break;
377 		}
378 		case SphericalKernel:
379 		{	//Select the truncation radius:
380 			if(params.exchangeRegularization==CoulombParams::None)
381 				Rc = ((CoulombSpherical&)coulomb).Rc; //same radius as truncation for Hartree/Vloc
382 			else
383 				Rc = pow((3./(4*M_PI)) * detRsuper, 1./3); //from supercell volume
384 			logPrintf("Truncated on a sphere of radius %lg bohrs.\n", Rc);
385 			if(omega)
386 			{	//Initialize look-up table for erfc fourier transform
387 				//--- Determine end of range for look-up table:
388 				double Gmax = 0.0;
389 				vector3<int> c;
390 				for(c[0]=-1; c[0]<=1; c[0]+=2) for(c[1]=-1; c[1]<=1; c[1]+=2) for(c[2]=-1; c[2]<=1; c[2]+=2)
391 				{	vector3<> f; for(int k=0; k<3; k++) f[k] = c[k]*(gInfo.S[k]/2 + 1);
392 					double G = sqrt(gInfo.GGT.metric_length_squared(f));
393 					if(G>Gmax) Gmax=G;
394 				}
395 				//--- Compute samples and spline coefficients:
396 				const double dG = 0.01 * (2*M_PI)/Rc;
397 				int nSamples = int(ceil(Gmax/dG)) + 10;
398 				std::vector<double> samples(nSamples);
399 				for(int i=0; i<nSamples; i++)
400 					samples[i] = truncatedErfcTilde(dG*i, omega, Rc);
401 				Vzero = samples[0]; //Note: this mode will always have VzeroCorrection = 0.
402 				sphericalScreenedCoeff = ManagedArray<double>(QuinticSpline::getCoeff(samples));
403 				sphericalScreenedCalc.coeff = sphericalScreenedCoeff.dataPref();
404 				sphericalScreenedCalc.dGinv = 1.0/dG;
405 				sphericalScreenedCalc.nSamples = nSamples;
406 				sphericalScreenedCalc.Rc = Rc;
407 				sphericalScreenedCalc.erfcOmegaRc_4piBy3 = (4*M_PI/3) * erfc(omega*Rc);
408 			}
409 			else Vzero = (2*M_PI) * Rc*Rc;  //Note: this mode will always have VzeroCorrection = 0.
410 			//Optionally contribute G=0 contribution to stress:
411 			if(params.computeStress)
412 				Vzero_RRT = matrix3<>(1.,1.,1.) * (4*M_PI/3) * Rc*Rc * erfc(Rc * omega); //since Rc ~ detR ^ (1./3)
413 			break;
414 		}
415 		case SlabKernel:
416 		{	const vector3<>& Ri = gInfo.R.column(params.iDir);
417 			slabCalc.iDir = params.iDir;
418 			slabCalc.hlfL = 0.5 * Ri.length();
419 			slabCalc.zHatOuter = outer(Ri * (1./Ri.length()));
420 			logPrintf("Truncated on a slab of thickness %lg bohrs.\n", 2*slabCalc.hlfL);
421 			if(omega)
422 			{	//Initialize look-up tables (quintic splines) of the difference between the screened
423 				//and the analytic unscreened kernels as a function of Gplane for each iG[iDir]:
424 				//--- Determine end of range for look-up table:
425 				double GplaneMax = 0.0;
426 				vector3<int> c;
427 				for(c[0]=-1; c[0]<=1; c[0]+=2) for(c[1]=-1; c[1]<=1; c[1]+=2) for(c[2]=-1; c[2]<=1; c[2]+=2)
428 				{	vector3<> f; for(int k=0; k<3; k++) f[k] = c[k]*(gInfo.S[k]/2 + 1);
429 					f[params.iDir] = 0.; //project out truncated direction
430 					double Gplane = sqrt(gInfo.GGT.metric_length_squared(f));
431 					if(Gplane>GplaneMax) GplaneMax=Gplane;
432 				}
433 				//--- Compute samples for each Gplane:
434 				const double dG = 0.01 * (2*M_PI)/slabCalc.hlfL;
435 				int nSamples = int(ceil(GplaneMax/dG)) + 10;
436 				int nAxis = gInfo.S[params.iDir]/2+1;
437 				double hAxis = 2.*slabCalc.hlfL/gInfo.S[params.iDir];
438 				std::vector<std::vector<double>> samples(nAxis, std::vector<double>(nSamples));
439 				ManagedArray<double> fftMem; fftMem.init(nAxis);
440 				double* fftArr = fftMem.data();
441 				fftw_plan planDCT = fftw_plan_r2r_1d(nAxis, fftArr, fftArr, FFTW_REDFT00, FFTW_ESTIMATE);
442 				for(int iSample=0; iSample<nSamples; iSample++)
443 				{	double Gplane = iSample*dG;
444 					//Initialize the partial fourier transform of the smooth part (-erf(omega r)/r before truncation):
445 					for(int iAxis=0; iAxis<nAxis; iAxis++)
446 					{	double z = iAxis*hAxis;
447 						if(Gplane==0.)
448 							fftArr[iAxis] = (1./(omega*sqrt(M_PI))) * exp(-omega*omega*z*z) + z*erf(omega*z);
449 						else
450 						{	fftArr[iAxis] = (-0.5/Gplane) * ( (Gplane*z > 100.) ? 0.
451 								: ( exp(Gplane*z) * erfc(0.5*Gplane/omega + omega*z)
452 								 + exp(-Gplane*z) * erfc(0.5*Gplane/omega - omega*z) ) );
453 						}
454 					}
455 					//Transform the truncated direction to reciprocal space:
456 					fftw_execute(planDCT);
457 					for(int iAxis=0; iAxis<nAxis; iAxis++)
458 						samples[iAxis][iSample] = (2*M_PI * hAxis) * fftArr[iAxis];
459 				}
460 				fftw_destroy_plan(planDCT);
461 				//--- Initialize splines for each iAxis:
462 				Vzero = samples[0][0] + (-2.*M_PI)*pow(slabCalc.hlfL,2) + VzeroCorrection;
463 				for(int iSample=0; iSample<nSamples; iSample++) samples[0][iSample] *= (iSample * dG); //handle 1/Gplane sinbularity in the Gz=0 plane
464 				samples[0][0] = (-4.*M_PI)*slabCalc.hlfL; //replace regularized version with limit of singular one multiplied out as above
465 				std::vector<double> coeff, coeffSub;
466 				for(int iAxis=0; iAxis<nAxis; iAxis++)
467 				{	coeffSub = QuinticSpline::getCoeff(samples[iAxis]); //coefficients for a given iAxis
468 					coeff.insert(coeff.end(), coeffSub.begin(), coeffSub.end());
469 				}
470 				slabCoeff = ManagedArray<double>(coeff);
471 				slabCalc.coeff = slabCoeff.dataPref();
472 				slabCalc.dGinv = 1.0/dG;
473 				slabCalc.nSamples = nSamples;
474 				slabCalc.nCoeff = coeffSub.size();
475 			}
476 			else Vzero = (-2.*M_PI)*pow(slabCalc.hlfL,2) + VzeroCorrection; //Everything else handled analytically
477 			Vzero_RRT = (-4.*M_PI)*pow(slabCalc.hlfL,2)*matrix3<>(slabCalc.zHatOuter) + VzeroCorrection_RRT;
478 			break;
479 		}
480 		case WignerSeitzGammaKernel:
481 		{	if(abs(det(super)) != 1)
482 				die("Exact-exchange in Isolated geometry should be used only with a single k-point.\n");
483 			if(omega) //Create an omega-screened version (but gamma-point only):
484 			{	VcGamma = new RealKernel(gInfo);
485 				symmetricMatrix3<>* VcGamma_RRTdata = 0;
486 				if(params.computeStress)
487 				{	VcGamma_RRT = new ManagedArray<symmetricMatrix3<>>();
488 					VcGamma_RRT->init(gInfo.nG);
489 					VcGamma_RRTdata = VcGamma_RRT->data();
490 				}
491 				CoulombKernel(gInfo.R, gInfo.S, params.isTruncated(), omega).compute(VcGamma->data(), ((CoulombIsolated&)coulomb).ws, VcGamma_RRTdata);
492 			}
493 			else //use the same kernel as hartree/Vloc
494 			{	VcGamma = &((CoulombIsolated&)coulomb).Vc;
495 				if(params.computeStress)
496 					VcGamma_RRT = &((CoulombIsolated&)coulomb).Vc_RRT;
497 				logPrintf("Using previously initialized isolated coulomb kernel.\n");
498 			}
499 		}
500 		case NumericalKernel:
501 		{	//Create the kernel on the k-point supercell:
502 			vector3<bool> isTruncated = params.exchangeRegularization==CoulombParams::WignerSeitzTruncated
503 				? vector3<bool>(true, true, true) //All directions truncated for Wigner-Seitz truncated method
504 				: params.isTruncated(); //Same truncation geometry as Hartree/Vloc for G=0 based methods
505 			//--- set up supercell sample count:
506 			vector3<int> Ssuper(0,0,0), s; //loop over vertices of parallelopiped:
507 			for(s[0]=-1; s[0]<=1; s[0]+=2)
508 			for(s[1]=-1; s[1]<=1; s[1]+=2)
509 			for(s[2]=-1; s[2]<=1; s[2]+=2)
510 			{	vector3<> iG;
511 				for(int k=0; k<3; k++)
512 					iG[k] = s[k] * (gInfo.S[k]/2 + 1); //include margin for k-point
513 				vector3<> iGsuper = (~super) * iG;
514 				for(int k=0; k<3; k++)
515 				{	int Ssuper_k = 2*std::abs(iGsuper[k]);
516 					if(Ssuper_k > Ssuper[k])
517 						Ssuper[k] = Ssuper_k;
518 				}
519 			}
520 			logPrintf("Creating Wigner-Seitz truncated kernel on k-point supercell with sample count ");
521 			Ssuper.print(globalLog, " %d");
522 			//Note: No FFTs of dimensions Ssuper are required (so no need to make it fftSuitable())
523 			//--- create kernel on supercell:
524 			size_t nGsuper = Ssuper[0]*(Ssuper[1]*size_t(1+Ssuper[2]/2));
525 			double* dataSuper = new double[nGsuper];
526 			if(!dataSuper) die_alone("Out of memory. (need %.1lfGB for supercell exchange kernel)\n", nGsuper*1e-9*sizeof(double));
527 			symmetricMatrix3<>* dataSuper_RRT = 0;
528 			if(params.computeStress)
529 			{	dataSuper_RRT = new symmetricMatrix3<>[nGsuper];
530 				if(!dataSuper_RRT) die_alone("Out of memory. (need %.1lfGB for lattice derivatives of supercell exchange kernel)\n", 6*nGsuper*1e-9*sizeof(double));
531 			}
532 			WignerSeitz wsSuper(Rsuper);
533 			CoulombKernel(Rsuper, Ssuper, isTruncated, omega).compute(dataSuper, wsSuper, dataSuper_RRT);
534 			dataSuper[0] += VzeroCorrection; //For slab/wire geometry kernels in AuxiliaryFunction/ProbeChargeEwald methods
535 			if(params.computeStress) dataSuper_RRT[0] += VzeroCorrection_RRT;
536 
537 			//Construct k-point difference mesh:
538 			for(const vector3<>& kpoint: kmesh)
539 			{	vector3<> dk = kpoint - kmesh.front();
540 				for(int k=0; k<3; k++) dk[k] -= floor(dk[k] + 0.5); //reduce to fundamental zone:
541 				 dkArr.push_back(dk);
542 			}
543 
544 			//Split supercell kernel into one for each k-point difference:
545 			logPrintf("Splitting supercell kernel to unit-cell with k-points ... "); logFlush();
546 			size_t nKernelData = dkArr.size() * gInfo.nr;
547 			kernelData.init(nKernelData);
548 			if(params.computeStress)
549 				kernelData_RRT.init(nKernelData);
550 			for(size_t i=0; i<dkArr.size(); i++)
551 				threadLaunch(extractExchangeKernel_thread, gInfo.nr, dkArr[i],
552 					gInfo.S, Ssuper, super, dataSuper, kernelData.data() + i*gInfo.nr,
553 					dataSuper_RRT, params.computeStress ? kernelData_RRT.data() + i*gInfo.nr : 0);
554 			delete[] dataSuper;
555 			if(params.computeStress)
556 				delete[] dataSuper_RRT;
557 			logPrintf("Done.\n");
558 			break;
559 		}
560 	}
561 }
562 
~ExchangeEval()563 ExchangeEval::~ExchangeEval()
564 {
565 	if(VcGamma && omega) delete VcGamma;
566 	if(VcGamma_RRT && omega) delete VcGamma_RRT;
567 }
568 
569 
multTransformedKernel(complexScalarFieldTilde & X,const double * kernel,const vector3<int> & offset)570 void multTransformedKernel(complexScalarFieldTilde& X, const double* kernel, const vector3<int>& offset)
571 {	assert(X);
572 	if(!offset.length_squared())
573 		callPref(eblas_zmuld)(X->gInfo.nr, kernel, 1, X->dataPref(false), 1);
574 	else
575 		callPref(multTransformedKernel)(X->gInfo.S, kernel, X->dataPref(false), offset);
576 }
577 
578 
operator ()(complexScalarFieldTilde && in,vector3<> kDiff) const579 complexScalarFieldTilde ExchangeEval::operator()(complexScalarFieldTilde&& in, vector3<> kDiff) const
580 {
581 	#define CALL_exchangeAnalytic(calc) callPref(exchangeAnalytic)(gInfo.S, gInfo.GGT, calc, in->dataPref(false), kDiff, Vzero, symmThresholdSq)
582 	switch(kernelMode)
583 	{	case PeriodicKernel:
584 		{	if(omega) CALL_exchangeAnalytic(ExchangePeriodicScreened_calc(omega));
585 			else CALL_exchangeAnalytic(ExchangePeriodic_calc());
586 			break;
587 		}
588 		case SphericalKernel:
589 		{	if(omega) CALL_exchangeAnalytic(sphericalScreenedCalc);
590 			else CALL_exchangeAnalytic(ExchangeSpherical_calc(Rc));
591 			break;
592 		}
593 		case SlabKernel:
594 		{	CALL_exchangeAnalytic(slabCalc);
595 			break;
596 		}
597 		case WignerSeitzGammaKernel:
598 		{	assert(kDiff.length_squared() < symmThresholdSq); //gamma-point only
599 			callPref(multRealKernel)(gInfo.S, VcGamma->dataPref(), in->dataPref(false));
600 			break;
601 		}
602 		case NumericalKernel:
603 		{	//Find the appropriate kDiff:
604 			bool kDiffFound = false;
605 			for(unsigned ik=0; ik<dkArr.size(); ik++)
606 				if(circDistanceSquared(dkArr[ik], kDiff) < symmThresholdSq)
607 				{	//Find the integer offset, if any:
608 					double err;
609 					vector3<int> offset = round(dkArr[ik] - kDiff, &err);
610 					assert(err < symmThreshold);
611 					//Multiply kernel:
612 					multTransformedKernel(in, kernelData.dataPref() + gInfo.nr * ik, offset);
613 					kDiffFound = true;
614 					break;
615 				}
616 			assert(kDiffFound);
617 			break;
618 		}
619 	}
620 	#undef CALL_exchangeAnalytic
621 	return in;
622 }
623 
latticeGradient(const complexScalarFieldTilde & X,vector3<> kDiff) const624 matrix3<> ExchangeEval::latticeGradient(const complexScalarFieldTilde& X, vector3<> kDiff) const
625 {
626 	#define RETURN_exchangeAnalyticStress(calc) \
627 	{	ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nr, isGpuEnabled()); \
628 		callPref(exchangeAnalyticStress)(gInfo.S, gInfo.G, calc, X->dataPref(), result.dataPref(), kDiff, symmThresholdSq); \
629 		matrix3<> resultSum = callPref(eblas_sum)(gInfo.nr, result.dataPref()); \
630 		if(kDiff.length_squared() < symmThresholdSq) resultSum += Vzero_RRT * X->getGzero().norm(); \
631 		return gInfo.detR * resultSum; \
632 	}
633 	switch(kernelMode)
634 	{	case PeriodicKernel:
635 		{	if(omega) RETURN_exchangeAnalyticStress(ExchangePeriodicScreened_calc(omega))
636 			else RETURN_exchangeAnalyticStress(ExchangePeriodic_calc())
637 		}
638 		case SphericalKernel:
639 		{	if(omega) RETURN_exchangeAnalyticStress(sphericalScreenedCalc)
640 			else RETURN_exchangeAnalyticStress(ExchangeSpherical_calc(Rc))
641 		}
642 		case SlabKernel:
643 			RETURN_exchangeAnalyticStress(slabCalc)
644 		case WignerSeitzGammaKernel:
645 		{	assert(kDiff.length_squared() < symmThresholdSq); //gamma-point only
646 			ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nr, isGpuEnabled());
647 			callPref(realKernelStress)(gInfo.S, VcGamma_RRT->dataPref(), X->dataPref(), result.dataPref());
648 			matrix3<> resultSum = callPref(eblas_sum)(gInfo.nr, result.dataPref());
649 			return gInfo.detR * resultSum;
650 		}
651 		case NumericalKernel:
652 		{	//Find kernel with the appropriate kDiff:
653 			for(unsigned ik=0; ik<dkArr.size(); ik++)
654 				if(circDistanceSquared(dkArr[ik], kDiff) < symmThresholdSq)
655 				{	//Find the integer offset, if any:
656 					double err;
657 					vector3<int> offset = round(dkArr[ik] - kDiff, &err);
658 					assert(err < symmThreshold);
659 					//Compute stress:
660 					ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nr, isGpuEnabled());
661 					callPref(transformedKernelStress)(gInfo.S, kernelData_RRT.dataPref() + gInfo.nr * ik, X->dataPref(), result.dataPref(), offset);
662 					matrix3<> resultSum = callPref(eblas_sum)(gInfo.nr, result.dataPref());
663 					return gInfo.detR * resultSum;
664 				}
665 			assert(!"Encountered invalid kDiff");
666 			return matrix3<>();
667 		}
668 		default:
669 			die_alone("Lattice gradient not yet implemented for this kernel mode.");
670 	}
671 	#undef RETURN_exchangeAnalyticStress
672 	return matrix3<>(); //to avoid compiler warnings
673 }
674