1 /*-------------------------------------------------------------------
2 Copyright 2012 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #include <core/CoulombWire.h>
21 #include <core/CoulombKernel.h>
22 #include <core/Coulomb_internal.h>
23 #include <core/Operators.h>
24 #include <core/Util.h>
25 #include <core/Spline.h>
26 #include <core/LoopMacros.h>
27 #include <gsl/gsl_sf.h>
28 
29 //Check orthogonality and return lattice direction name
checkOrthogonality(const GridInfo & gInfo,int iDir)30 string checkOrthogonality(const GridInfo& gInfo, int iDir)
31 {	string dirName(3, '0'); dirName[iDir] = '1';
32 	if((!WignerSeitz::isOrthogonal(gInfo.R.column(iDir),gInfo.R.column((iDir+1)%3)))
33 	|| (!WignerSeitz::isOrthogonal(gInfo.R.column(iDir),gInfo.R.column((iDir+2)%3))) )
34 		die("Lattice direction %s is not perpendicular to the other two basis vectors.\n", dirName.c_str());
35 	return dirName;
36 }
37 
38 
39 //--------------- class Cbar ----------
40 
Cbar()41 Cbar::Cbar() { iWS = gsl_integration_workspace_alloc(maxIntervals); }
~Cbar()42 Cbar::~Cbar() { gsl_integration_workspace_free(iWS); }
43 
44 //Compute Cbar_k^sigma(rho)
operator ()(double k,double sigma,double rho,double rho0)45 double Cbar::operator()(double k, double sigma, double rho, double rho0)
46 {	assert(k >= 0.);
47 	assert(sigma > 0.);
48 	assert(rho >= 0.);
49 	if(k == 0) //Use closed form in terms of the exponential integral function:
50 	{	const double xMax = 700.; //threshold (with some margin) to underflow in expint_E1
51 		double hlfSigmaInvSq = 0.5/(sigma*sigma);
52 		double x = hlfSigmaInvSq*rho*rho;
53 		double x0 = hlfSigmaInvSq*rho0*rho0;
54 		if(x < 3.5e-3) return (M_EULER + log(x0)) - x*(1. - x*(1./4 - x*(1./18 - x*(1./96))));
55 		else return -2.*log(rho/rho0) - (x>xMax ? 0. : gsl_sf_expint_E1(x));
56 	}
57 	else
58 	{	double R = rho/sigma;
59 		double K = k*sigma;
60 		if(R*(R-2*K) > 100.)
61 			return 2. * gsl_sf_bessel_K0_scaled(k*rho) * exp(-k*rho);
62 		std::pair<double,double> params;
63 		gsl_function f; f.params = &params;
64 		if(R < 1.)
65 		{	params.first  = R;
66 			params.second = K;
67 			f.function = &integrandSmallRho;
68 		}
69 		else
70 		{	params.first  = R*R;
71 			params.second = K*R;
72 			f.function = &integrandLargeRho;
73 		}
74 		double result, err;
75 		gsl_integration_qagiu(&f, 0., 0., 1e-13, maxIntervals, iWS, &result, &err);
76 		return 2 * exp(-0.5*(K*K + R*R)) * result;
77 	}
78 }
79 
80 //Integrand for rho < sigma
integrandSmallRho(double t,void * params)81 double Cbar::integrandSmallRho(double t,  void* params)
82 {	const std::pair<double,double>& p = *((std::pair<double,double>*)params);
83 	const double& R = p.first; //R = (rho/sigma)
84 	const double& K = p.second; //K = (k*sigma)
85 	return t * exp(-0.5*t*t + t*(R-K)) * gsl_sf_bessel_I0_scaled(R*t) * gsl_sf_bessel_K0_scaled(K*t);
86 }
87 
88 //Integrand for rho > sigma
integrandLargeRho(double t,void * params)89 double Cbar::integrandLargeRho(double t, void* params)
90 {	const std::pair<double,double>& p = *((std::pair<double,double>*)params);
91 	const double& Rsq = p.first; //Rsq = (rho/sigma)^2
92 	const double& KR  = p.second; //KR = (k*sigma)*(rho/sigma) = k*rho
93 	return t * Rsq * exp(-0.5*Rsq*t*t + t*(Rsq-KR)) * gsl_sf_bessel_I0_scaled(Rsq*t) * gsl_sf_bessel_K0_scaled(KR*t);
94 }
95 
96 
97 //--------------- class Cbar_k_sigma ----------
98 
Cbar_k_sigma(double k,double sigma,double rhoMax,double rho0,bool prime)99 Cbar_k_sigma::Cbar_k_sigma(double k, double sigma, double rhoMax, double rho0, bool prime)
100 {	assert(rhoMax > 0.);
101 	//Pick grid and initialize sample values:
102 	double drho = 0.03*sigma; //With 5th order splines, this guarantees rel error ~ 1e-14 typical, 1e-12 max
103 	drhoInv = 1./drho;
104 	isLog = (k != 0.); //When k!=0, samples are positive and interpolate on the logarithm
105 	if(prime) assert(k != 0.);
106 	std::vector<double> x(size_t(drhoInv*rhoMax)+10);
107 	Cbar cbar;
108 	for(size_t i=0; i<x.size(); i++)
109 	{	double c = 0.;
110 		if(prime) //compute -dcbar/dk by finite difference
111 		{	double dk = k*1e-5;
112 			c = (0.5/dk) * (cbar(k-dk, sigma, i*drho, rho0) - cbar(k+dk, sigma, i*drho, rho0));
113 		}
114 		else c = cbar(k, sigma, i*drho, rho0);
115 
116 		if(isLog) x[i] = (c>0 ? log(c) : (i ? x[i-1] : log(DBL_MIN)));
117 		else x[i] = c;
118 	}
119 	coeff = QuinticSpline::getCoeff(x);
120 }
121 
122 
123 //! 1D Ewald sum
124 class EwaldWire : public Ewald
125 {
126 	matrix3<> R, G, RTR, GGT; //!< Lattice vectors, reciprocal lattice vectors and corresponding metrics
127 	int iDir; //!< truncated direction
128 	const WignerSeitz& ws; //!< Wigner-Seitz cell
129 	double ionMargin; //!< Safety margin around ions
130 	double Rc; //!< cutoff radius for spherical mode (used for ion overlap checks only)
131 
132 	double sigma; //!< gaussian width for Ewald sums
133 	vector3<int> Nreal; //!< max unit cell indices for real-space sum
134 	vector3<int> Nrecip; //!< max unit cell indices for reciprocal-space sum
135 
136 	std::vector<std::shared_ptr<Cbar_k_sigma>> cbar_k_sigma;
137 	std::vector<std::shared_ptr<Cbar_k_sigma>> minus_cbar_k_sigma_k; //!< -d(cbar_k_sigma)/dk for stress calculation
138 
139 public:
EwaldWire(const matrix3<> & R,int iDir,const WignerSeitz & ws,double ionMargin,double Rc=0.,double rho0=1.)140 	EwaldWire(const matrix3<>& R, int iDir, const WignerSeitz& ws, double ionMargin, double Rc=0., double rho0=1.)
141 	: R(R), G((2*M_PI)*inv(R)), RTR((~R)*R), GGT(G*(~G)), iDir(iDir), ws(ws), ionMargin(ionMargin), Rc(Rc)
142 	{	logPrintf("\n---------- Setting up 1D ewald sum ----------\n");
143 		//Determine optimum gaussian width for 1D Ewald sums:
144 		// From below, the number of reciprocal cells ~ |R.column[iDir]|
145 		//    and number of real space cells ~ |G.row[iDir]|
146 		// including the fact that a term in the reciprocal space sum
147 		// costs roughly 10 times as much as that in the real space sum
148 		sigma = sqrt(10.*R.column(iDir).length() / G.row(iDir).length());
149 		logPrintf("Optimum gaussian width for ewald sums = %lf bohr.\n", sigma);
150 
151 		//Carry real space sums to Rmax = 10 sigma and Gmax = 10/sigma
152 		//This leads to relative errors ~ 1e-22 in both sums, well within double precision limits
153 		for(int k=0; k<3; k++)
154 		{	Nreal[k]  = (k!=iDir) ? 0 : 1+ceil(CoulombKernel::nSigmasPerWidth * G.row(k).length() * sigma / (2*M_PI));
155 			Nrecip[k] = (k!=iDir) ? 0 : 1+ceil(CoulombKernel::nSigmasPerWidth * R.column(k).length() / (2*M_PI*sigma));
156 		}
157 		logPrintf("Real space sums over %d unit cells with max indices ", 2*Nreal[iDir]+1);
158 		Nreal.print(globalLog, " %d ");
159 		logPrintf("Reciprocal space sums over %d terms with max indices ", Nrecip[iDir]+1);
160 		Nrecip.print(globalLog, " %d ");
161 
162 		//Initialize Cbar_k^sigma look-up tables:
163 		cbar_k_sigma.resize(Nrecip[iDir]+1);
164 		minus_cbar_k_sigma_k.resize(Nrecip[iDir]+1);
165 		vector3<int> iG(0,0,0);
166 		double rhoMax = ws.circumRadius(iDir);
167 		for(iG[iDir]=0; iG[iDir]<=Nrecip[iDir]; iG[iDir]++)
168 		{	double k = sqrt(GGT.metric_length_squared(iG));
169 			cbar_k_sigma[iG[iDir]] = std::make_shared<Cbar_k_sigma>(k, sigma, rhoMax, rho0);
170 			if(k) minus_cbar_k_sigma_k[iG[iDir]] = std::make_shared<Cbar_k_sigma>(k, sigma, rhoMax, rho0, true);
171 		}
172 	}
173 
energyAndGrad(std::vector<Atom> & atoms,matrix3<> * E_RRTptr) const174 	double energyAndGrad(std::vector<Atom>& atoms, matrix3<>* E_RRTptr) const
175 	{	if(!atoms.size()) return 0.;
176 		double eta = sqrt(0.5)/sigma, etaSq=eta*eta;
177 		matrix3<> E_RRT; //stress * volume (computed if E_RRTptr non-null)
178 		double E_RRTzz = 0.; //stress contribution along periodic direction
179 
180 		//Position independent terms: (Self-energy correction)
181 		double ZsqTot = 0.;
182 		for(const Atom& a: atoms)
183 			ZsqTot += a.Z * a.Z;
184 		double E = -0.5 * ZsqTot * eta * (2./sqrt(M_PI));
185 
186 		//Reduce positions to first unit cell:
187 		//Shift all points in the truncated directions into the 2D Wigner-Seitz cell
188 		//centered on one of the atoms; choice of this atom is irrelevant if every atom
189 		//lies in the WS cell of the other with a consistent translation:
190 		vector3<> pos0 = atoms[0].pos;
191 		for(Atom& a: atoms)
192 			a.pos = pos0 + ws.restrict(a.pos - pos0);
193 		if(not ZsqTot) return 0.;
194 
195 		//Real space sum:
196 		vector3<int> iR(0,0,0); //integer cell number
197 		for(const Atom& a2: atoms)
198 			for(Atom& a1: atoms)
199 				for(iR[iDir]=-Nreal[iDir]; iR[iDir]<=Nreal[iDir]; iR[iDir]++)
200 				{	vector3<> x = iR + (a1.pos - a2.pos);
201 					double rSq = RTR.metric_length_squared(x);
202 					if(!rSq) continue; //exclude self-interaction
203 					double r = sqrt(rSq);
204 					E += 0.5 * a1.Z * a2.Z * erfc(eta*r)/r;
205 					double minus_E_r_by_r = a1.Z * a2.Z * (erfc(eta*r)/r + (2./sqrt(M_PI))*eta*exp(-etaSq*rSq))/rSq;
206 					a1.force += (RTR * x) * minus_E_r_by_r;
207 					if(E_RRTptr)
208 					{	vector3<> rVec = R * x;
209 						E_RRT -= (0.5*minus_E_r_by_r) * outer(rVec,rVec);
210 					}
211 				}
212 
213 		//Reciprocal space sum:
214 		double Lz = sqrt(RTR(iDir,iDir));
215 		double Gzz = (2*M_PI)/Lz;
216 		double volPrefac = 0.5 / Lz;
217 		for(unsigned i1=0; i1<atoms.size(); i1++)
218 		{	Atom& a1 = atoms[i1];
219 			for(unsigned i2=0; i2<=i1; i2++)
220 			{	Atom& a2 = atoms[i2];
221 				double prefac = volPrefac * a1.Z * a2.Z * (i1==i2 ? 1 : 2);
222 				vector3<> r12 = a1.pos - a2.pos;
223 				vector3<> rho12vec = r12; rho12vec[iDir] = 0.; //projected to truncation plane
224 				double rho12 = sqrt(RTR.metric_length_squared(rho12vec));
225 				if(Rc)
226 				{	if(rho12 >= Rc - ionMargin)
227 						die("Atoms %d and %d are separated by rho = %lg >= Rc-ionMargin = %lg bohrs.\n" ionMarginMessage, i1+1, i2+1, rho12, Rc-ionMargin);
228 				}
229 				else
230 				{	if(ws.boundaryDistance(rho12vec, iDir) <= ionMargin)
231 						die("Separation between atoms %d and %d lies within the margin of %lg bohrs from the Wigner-Seitz boundary.\n" ionMarginMessage, i1+1, i2+1, ionMargin);
232 				}
233 				double E12 = 0.; vector3<> E12_r12(0.,0.,0.); //energy and gradient from this pair
234 				double E12_rho = 0.; //derivative w.r.t rho12 collected for stress calculation
235 				vector3<int> iG(0,0,0); //integer reciprocal cell number
236 				int& iGz = iG[iDir]; //only iG[iDir] will be non-zero below
237 				for(iGz=0; iGz<=Nrecip[iDir]; iGz++)
238 				{	//1D structure factor term and derivative
239 					double c, s; sincos((2*M_PI)*dot(iG,r12), &s, &c);
240 					if(iGz) { c *= 2.; s *= 2.; } //include contribution from -iGz
241 					//Contribution from truncated directions:
242 					double rhoTerm = cbar_k_sigma[iGz]->value(rho12);
243 					double rhoTermPrime = cbar_k_sigma[iGz]->deriv(rho12);
244 					//Update energy and forces:
245 					E12 += prefac * c * rhoTerm;
246 					E12_r12 += (prefac * -s * rhoTerm * (2*M_PI)) * iG
247 						+ (prefac * c * rhoTermPrime * (rho12 ? 1./rho12 : 0.)) * (RTR * rho12vec);
248 					//Accumulate stresses:
249 					if(E_RRTptr)
250 					{	E12_rho += prefac * c * rhoTermPrime;
251 						if(iGz) E_RRTzz += prefac * c * minus_cbar_k_sigma_k[iGz]->value(rho12) * (iGz*Gzz);
252 					}
253 				}
254 				E += E12;
255 				a1.force -= E12_r12;
256 				a2.force += E12_r12;
257 				if(E_RRTptr)
258 				{	if(rho12)
259 					{	vector3<> rho12cart = R * rho12vec;
260 						E_RRT += (E12_rho/rho12) * outer(rho12cart,rho12cart);
261 					}
262 					E_RRTzz -= E12; //propagated through volPrefac
263 				}
264 			}
265 		}
266 
267 		if(E_RRTptr)
268 		{	//Add zz contribution collected separately above into E_RRT:
269 			vector3<> zHat = R.column(iDir); zHat *= (1./zHat.length());
270 			E_RRT += E_RRTzz * outer(zHat,zHat);
271 			//Accumulate to total stress:
272 			*E_RRTptr += E_RRT;
273 		}
274 		return E;
275 	}
276 };
277 
278 
279 
CoulombWire(const GridInfo & gInfoOrig,const CoulombParams & params)280 CoulombWire::CoulombWire(const GridInfo& gInfoOrig, const CoulombParams& params)
281 : Coulomb(gInfoOrig, params), ws(gInfo.R), Vc(gInfo)
282 {	//Check orthogonality
283 	string dirName = checkOrthogonality(gInfo, params.iDir);
284 	//Compute kernel (and optionally its lattice derivative):
285 	symmetricMatrix3<>* Vc_RRTdata = 0;
286 	if(params.computeStress)
287 	{	Vc_RRT.init(gInfo.nG);
288 		Vc_RRTdata = Vc_RRT.data();
289 	}
290 	CoulombKernel(gInfo.R, gInfo.S, params.isTruncated()).compute(Vc.data(), ws, Vc_RRTdata);
291 	initExchangeEval();
292 }
293 
apply(ScalarFieldTilde && in) const294 ScalarFieldTilde CoulombWire::apply(ScalarFieldTilde&& in) const
295 {	return Vc * in;
296 }
297 
createEwald(matrix3<> R,size_t nAtoms) const298 std::shared_ptr<Ewald> CoulombWire::createEwald(matrix3<> R, size_t nAtoms) const
299 {	return std::make_shared<EwaldWire>(R, params.iDir, ws, params.ionMargin);
300 }
301 
getLatticeGradient(const ScalarFieldTilde & X,const ScalarFieldTilde & Y) const302 matrix3<> CoulombWire::getLatticeGradient(const ScalarFieldTilde& X, const ScalarFieldTilde& Y) const
303 {	ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
304 	callPref(coulombNumericalStress)(gInfo.S, gInfo.GGT, Vc_RRT.dataPref(), X->dataPref(), Y->dataPref(), result.dataPref());
305 	matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
306 	return gInfo.detR * resultSum;
307 }
308 
309 
310 
311 //----------------- class CoulombCylindrical ---------------------
312 
setVcylindrical(size_t iStart,size_t iStop,vector3<int> S,const matrix3<> GGT,int iDir,double Rc,double * Vc,symmetricMatrix3<> * Vc_RRT)313 void setVcylindrical(size_t iStart, size_t iStop, vector3<int> S, const matrix3<> GGT, int iDir, double Rc, double* Vc, symmetricMatrix3<>* Vc_RRT)
314 {	THREAD_halfGspaceLoop
315 	(	double Gsq = GGT.metric_length_squared(iG);
316 		double GaxisSq = GGT(iDir,iDir) * iG[iDir] * iG[iDir];
317 		double GplaneSq = Gsq - GaxisSq;
318 		double RGaxis = GaxisSq>0. ? Rc*sqrt(GaxisSq) : 0.; //safe sqrt to prevent NaN from roundoff errors
319 		double RGplane = GplaneSq>0. ? Rc*sqrt(GplaneSq) : 0.; //safe sqrt to prevent NaN from roundoff errors
320 		double minus_Vc_Gplane_by_Gplane = 0.; //for stress
321 		double minus_Vc_Gaxis_by_Gaxis = 0.;
322 		double Vc0 = 4*M_PI/Gsq;
323 		double J0 = gsl_sf_bessel_J0(RGplane);
324 		double J1 = gsl_sf_bessel_J1(RGplane);
325 
326 		if(iG[iDir])
327 		{	double K0 = gsl_sf_bessel_K0(RGaxis);
328 			double K1 = gsl_sf_bessel_K1(RGaxis);
329 			Vc[i] = Vc0 * (1. + (RGplane*J1)*K0 - J0*(K1*RGaxis));
330 			if(Vc_RRT)
331 			{	double J0prime = -J1;
332 				double xJ1prime = J0*RGplane;
333 				double K0prime = -K1;
334 				double xK1prime = -K0*RGaxis;
335 				if(GplaneSq)
336 					minus_Vc_Gplane_by_Gplane = 2.*Vc[i]/Gsq - (RGplane/GplaneSq)*Vc0*(xJ1prime*K0 - J0prime*(K1*RGaxis));
337 				minus_Vc_Gaxis_by_Gaxis = 2.*Vc[i]/Gsq - (RGaxis/GaxisSq)*Vc0*((RGplane*J1)*K0prime - J0*xK1prime);
338 			}
339 		}
340 		else if(GplaneSq)
341 		{	Vc[i] = Vc0*(1.-J0);
342 			if(Vc_RRT)
343 				minus_Vc_Gplane_by_Gplane = (Vc0/GplaneSq) * (2.*(1.-J0) - RGplane*J1);
344 		}
345 		else Vc[i] = M_PI*Rc*Rc;
346 
347 		//Set lattice derivative of kernel:
348 		if(Vc_RRT)
349 		{	vector3<> iGplane(iG); iGplane[iDir]=0;
350 			Vc_RRT[i] = minus_Vc_Gplane_by_Gplane * outer(iGplane);
351 			((double*)(Vc_RRT+i))[iDir] = minus_Vc_Gaxis_by_Gaxis * iG[iDir]*iG[iDir];
352 		}
353 	)
354 }
355 
356 
CoulombCylindrical(const GridInfo & gInfoOrig,const CoulombParams & params)357 CoulombCylindrical::CoulombCylindrical(const GridInfo& gInfoOrig, const CoulombParams& params)
358 : Coulomb(gInfoOrig, params), ws(gInfo.R), Rc(params.Rc), Vc(gInfo)
359 {	//Check orthogonality:
360 	string dirName = checkOrthogonality(gInfo, params.iDir);
361 	//Check the truncation radius:
362 	double RcMax = ws.inRadius(params.iDir);
363 	if(Rc > RcMax)
364 		die("Cylindrical truncation radius %lg exceeds 2D Wigner-Seitz cell in-radius of %lg bohrs.\n", Rc, RcMax);
365 	if(!Rc) Rc = RcMax;
366 	//Compute kernel (and optionally its lattice derivative):
367 	symmetricMatrix3<>* Vc_RRTdata = 0;
368 	if(params.computeStress)
369 	{	Vc_RRT.init(gInfo.nG);
370 		Vc_RRTdata = Vc_RRT.data();
371 	}
372 	threadLaunch(setVcylindrical, gInfo.nG, gInfo.S, gInfo.GGT, params.iDir, Rc, Vc.data(), Vc_RRTdata);
373 	logPrintf("Initialized cylindrical truncation of radius %lg bohrs with axis along lattice direction %s\n", Rc, dirName.c_str());
374 	initExchangeEval();
375 }
376 
apply(ScalarFieldTilde && in) const377 ScalarFieldTilde CoulombCylindrical::apply(ScalarFieldTilde&& in) const
378 {	return Vc * in;
379 }
380 
createEwald(matrix3<> R,size_t nAtoms) const381 std::shared_ptr<Ewald> CoulombCylindrical::createEwald(matrix3<> R, size_t nAtoms) const
382 {	return std::make_shared<EwaldWire>(R, params.iDir, ws, params.ionMargin, Rc, Rc);
383 }
384 
getLatticeGradient(const ScalarFieldTilde & X,const ScalarFieldTilde & Y) const385 matrix3<> CoulombCylindrical::getLatticeGradient(const ScalarFieldTilde& X, const ScalarFieldTilde& Y) const
386 {	ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
387 	callPref(coulombNumericalStress)(gInfo.S, gInfo.GGT, Vc_RRT.dataPref(), X->dataPref(), Y->dataPref(), result.dataPref());
388 	matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
389 	return gInfo.detR * (gInfo.GT * resultSum * gInfo.G);
390 }
391 
392