1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #include <fluid/MixedFMT.h>
21 #include <fluid/MixedFMT_internal.h>
22 #include <core/VectorField.h>
23 
24 //Compute the tensor weighted density (threaded/gpu):
tensorKernel_sub(size_t iStart,size_t iStop,vector3<int> S,const matrix3<> G,const complex * nTilde,tensor3<complex * > mTilde)25 inline void tensorKernel_sub(size_t iStart, size_t iStop, vector3<int> S, const matrix3<> G,
26 	const complex* nTilde, tensor3<complex*> mTilde)
27 {	THREAD_halfGspaceLoop( tensorKernel_calc(i, iG, IS_NYQUIST, G, nTilde, mTilde); )
28 }
29 #ifdef GPU_ENABLED
30 void tensorKernel_gpu(vector3<int> S, const matrix3<> G,
31 	const complex* nTilde, tensor3<complex*> mTilde);
32 #endif
tensorKernel(const ScalarFieldTilde & nTilde)33 TensorFieldTilde tensorKernel(const ScalarFieldTilde& nTilde)
34 {	const GridInfo& gInfo = nTilde->gInfo;
35 	TensorFieldTilde mTilde(gInfo, isGpuEnabled());
36 	#ifdef GPU_ENABLED
37 	tensorKernel_gpu(gInfo.S, gInfo.G, nTilde->dataGpu(), mTilde.dataGpu());
38 	#else
39 	threadLaunch(tensorKernel_sub, gInfo.nG, gInfo.S, gInfo.G, nTilde->data(), mTilde.data());
40 	#endif
41 	return mTilde;
42 }
43 
44 //Propagate gradient w.r.t tensor weighted density (threaded/gpu):
tensorKernel_grad_sub(size_t iStart,size_t iStop,vector3<int> S,const matrix3<> G,tensor3<const complex * > grad_mTilde,complex * grad_nTilde)45 inline void tensorKernel_grad_sub(size_t iStart, size_t iStop, vector3<int> S, const matrix3<> G,
46 	tensor3<const complex*> grad_mTilde, complex* grad_nTilde)
47 {	THREAD_halfGspaceLoop( tensorKernel_grad_calc(i, iG, IS_NYQUIST, G, grad_mTilde, grad_nTilde); )
48 }
49 #ifdef GPU_ENABLED
50 void tensorKernel_grad_gpu(vector3<int> S, const matrix3<> G,
51 	tensor3<const complex*> grad_mTilde, complex* grad_nTilde);
52 #endif
tensorKernel_grad(const TensorFieldTilde & grad_mTilde)53 ScalarFieldTilde tensorKernel_grad(const TensorFieldTilde& grad_mTilde)
54 {	const GridInfo& gInfo = grad_mTilde[0]->gInfo;
55 	ScalarFieldTilde grad_nTilde(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled()));
56 	#ifdef GPU_ENABLED
57 	tensorKernel_grad_gpu(gInfo.S, gInfo.G, grad_mTilde.const_dataGpu(), grad_nTilde->dataGpu());
58 	#else
59 	threadLaunch(tensorKernel_grad_sub, gInfo.nG, gInfo.S, gInfo.G, grad_mTilde.const_data(), grad_nTilde->data());
60 	#endif
61 	return grad_nTilde;
62 }
63 
64 
65 #ifdef GPU_ENABLED
66 void phiFMT_gpu(int N, double* phiArr,
67 	const double *n0arr, const double *n1arr, const double *n2arr, const double *n3arr,
68 	vector3<const double*> n1vArr, vector3<const double*> n2vArr, tensor3<const double*> n2mArr,
69 	double *grad_n0arr, double *grad_n1arr, double *grad_n2arr, double *grad_n3arr,
70 	vector3<double*> grad_n1vArr, vector3<double*> grad_n2vArr, tensor3<double*> grad_n2mArr);
71 #endif
72 
73 
PhiFMT(const ScalarField & n0,const ScalarField & n1,const ScalarField & n2,const ScalarFieldTilde & n3tilde,const ScalarFieldTilde & n1vTilde,const ScalarFieldTilde & n2mTilde,ScalarField & grad_n0,ScalarField & grad_n1,ScalarField & grad_n2,ScalarFieldTilde & grad_n3tilde,ScalarFieldTilde & grad_n1vTilde,ScalarFieldTilde & grad_n2mTilde)74 double PhiFMT(const ScalarField& n0, const ScalarField& n1, const ScalarField& n2,
75 	const ScalarFieldTilde& n3tilde, const ScalarFieldTilde& n1vTilde, const ScalarFieldTilde& n2mTilde,
76 	ScalarField& grad_n0, ScalarField& grad_n1, ScalarField& grad_n2,
77 	ScalarFieldTilde& grad_n3tilde, ScalarFieldTilde& grad_n1vTilde, ScalarFieldTilde& grad_n2mTilde)
78 {
79 	const GridInfo& gInfo = n0->gInfo;
80 	ScalarField n3 = I(n3tilde);
81 	VectorField n1v = I(gradient(n1vTilde));
82 	VectorField n2v = I(gradient(-n3tilde));
83 	TensorField n2m = I(tensorKernel(n2mTilde));
84 
85 	ScalarField grad_n3; VectorField grad_n1v, grad_n2v; TensorField grad_n2m;
86 	nullToZero(grad_n0, gInfo); nullToZero(grad_n1, gInfo); nullToZero(grad_n2, gInfo); nullToZero(grad_n3, gInfo);
87 	nullToZero(grad_n1v, gInfo); nullToZero(grad_n2v, gInfo); nullToZero(grad_n2m, gInfo);
88 
89 	double result;
90 	#ifdef GPU_ENABLED
91 	{	ScalarField phiArr(ScalarFieldData::alloc(gInfo, true));
92 		phiFMT_gpu(gInfo.nr, phiArr->dataGpu(),
93 			n0->dataGpu(), n1->dataGpu(), n2->dataGpu(), n3->dataGpu(), n1v.const_dataGpu(), n2v.const_dataGpu(), n2m.const_dataGpu(),
94 			grad_n0->dataGpu(), grad_n1->dataGpu(), grad_n2->dataGpu(), grad_n3->dataGpu(),
95 			grad_n1v.dataGpu(), grad_n2v.dataGpu(), grad_n2m.dataGpu());
96 		result = gInfo.dV * sum(phiArr);
97 	}
98 	#else
99 	result = gInfo.dV*threadedAccumulate(phiFMT_calc, gInfo.nr,
100 			n0->data(), n1->data(), n2->data(), n3->data(), n1v.const_data(), n2v.const_data(), n2m.const_data(),
101 			grad_n0->data(), grad_n1->data(), grad_n2->data(), grad_n3->data(),
102 			grad_n1v.data(), grad_n2v.data(), grad_n2m.data());
103 	#endif
104 	n3=0; n1v=0; n2v=0; n2m=0; //no longer need these weighted densities (clean up)
105 
106 	grad_n2mTilde += tensorKernel_grad(Idag(grad_n2m)); grad_n2m=0;
107 	grad_n1vTilde -= divergence(Idag(grad_n1v)); grad_n1v=0;
108 	grad_n3tilde += ( Idag(grad_n3) + divergence(Idag(grad_n2v)) ); grad_n3=0; grad_n2v=0;
109 	return result;
110 }
111 
phiFMTuniform(double n0,double n1,double n2,double n3,double & grad_n0,double & grad_n1,double & grad_n2,double & grad_n3)112 double phiFMTuniform(double n0, double n1, double n2, double n3,
113 	double& grad_n0, double& grad_n1, double& grad_n2, double& grad_n3)
114 {
115 	double zero=0.0, constzero=0.0;
116 	std::vector<double*> zeroArr(5,&zero);
117 	std::vector<const double*> constZeroArr(5,&constzero); //dummy arrays for zero vector and tensor weighted densities
118 
119 	return phiFMT_calc(0, &n0, &n1, &n2, &n3, constZeroArr, constZeroArr, constZeroArr,
120 		&grad_n0, &grad_n1, &grad_n2, &grad_n3, zeroArr, zeroArr, zeroArr);
121 }
122 
123 #ifdef GPU_ENABLED
124 void phiBond_gpu(int N, double Rhm, double scale, double* phiArr,
125 	const double *n0arr, const double *n2arr, const double *n3arr, vector3<const double*> n2vArr,
126 	double *grad_n0arr, double *grad_n2arr, double *grad_n3arr, vector3<double*> grad_n2vArr);
127 #endif
128 
PhiBond(double Rhm,double scale,const ScalarField & n0mol,const ScalarField & n2,const ScalarFieldTilde & n3tilde,ScalarField & grad_n0mol,ScalarField & grad_n2,ScalarFieldTilde & grad_n3tilde)129 double PhiBond(double Rhm, double scale, const ScalarField& n0mol, const ScalarField& n2, const ScalarFieldTilde& n3tilde,
130 	ScalarField& grad_n0mol, ScalarField& grad_n2, ScalarFieldTilde& grad_n3tilde)
131 {
132 	const GridInfo& gInfo = n0mol->gInfo;
133 	//Compute n3 and n2v in real space from n3tilde:
134 	ScalarField n3 = I(n3tilde);
135 	VectorField n2v = I(gradient(-n3tilde));
136 	//Bonding correction and gradient:
137 	ScalarField grad_n3; VectorField grad_n2v;
138 	nullToZero(grad_n0mol, gInfo);
139 	nullToZero(grad_n2, gInfo);
140 	nullToZero(grad_n3, gInfo);
141 	nullToZero(grad_n2v, gInfo);
142 	#ifdef GPU_ENABLED
143 	ScalarField phiArr(ScalarFieldData::alloc(gInfo, true));
144 	phiBond_gpu(gInfo.nr, Rhm, scale, phiArr->dataGpu(),
145 		n0mol->dataGpu(), n2->dataGpu(), n3->dataGpu(), n2v.const_dataGpu(),
146 		grad_n0mol->dataGpu(), grad_n2->dataGpu(), grad_n3->dataGpu(), grad_n2v.dataGpu());
147 	double result = gInfo.dV * sum(phiArr);
148 	#else
149 	double result = gInfo.dV * threadedAccumulate(phiBond_calc, gInfo.nr, Rhm, scale,
150 			n0mol->data(), n2->data(), n3->data(), n2v.const_data(),
151 			grad_n0mol->data(), grad_n2->data(), grad_n3->data(), grad_n2v.data());
152 	#endif
153 	n3=0; n2v=0; //no longer need these weighted densities (clean up)
154 	//Propagate grad_n2v and grad_n3 to grad_n3tilde:
155 	grad_n3tilde += ( Idag(grad_n3) + divergence(Idag(grad_n2v)) );
156 	return result;
157 }
158 
phiBondUniform(double Rhm,double scale,double n0mol,double n2,double n3,double & grad_n0mol,double & grad_n2,double & grad_n3)159 double phiBondUniform(double Rhm, double scale, double n0mol, double n2, double n3,
160 	double& grad_n0mol, double& grad_n2, double& grad_n3)
161 {	double zero=0.0, constzero=0.0;
162 	std::vector<double*> zeroArr(3,&zero);
163 	std::vector<const double*> constZeroArr(3,&constzero); //dummy arrays for zero vector weighted densities
164 	return phiBond_calc(0, Rhm, scale, &n0mol, &n2, &n3, constZeroArr, &grad_n0mol, &grad_n2, &grad_n3, zeroArr);
165 }
166