1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3 
4 This file is part of JDFTx.
5 
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with JDFTx.  If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19 
20 #include <core/Operators.h>
21 #include <core/Operators_internal.h>
22 #include <core/GridInfo.h>
23 #include <core/VectorField.h>
24 #include <core/ScalarFieldArray.h>
25 #include <core/Random.h>
26 #include <string.h>
27 
28 //------------------------------ Conversion operators ------------------------------
29 
30 
Real(const complexScalarField & C)31 ScalarField Real(const complexScalarField& C)
32 {	const GridInfo& gInfo = C->gInfo;
33 	ScalarField R; nullToZero(R, gInfo);
34 	callPref(eblas_daxpy)(gInfo.nr, C->scale, (const double*)C->dataPref(false), 2, R->dataPref(false), 1);
35 	R->scale = 1;
36 	return R;
37 }
38 
RealG_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * vFull,complex * vHalf,double scaleFac)39 void RealG_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* vFull, complex* vHalf, double scaleFac)
40 {	THREAD_halfGspaceLoop( RealG_calc(i, iG, S, vFull, vHalf, scaleFac); )
41 }
42 #ifdef GPU_ENABLED
43 void RealG_gpu(const vector3<int> S, const complex* vFull, complex* vHalf, double scaleFac);
44 #endif
Real(const complexScalarFieldTilde & full)45 ScalarFieldTilde Real(const complexScalarFieldTilde& full)
46 {	const GridInfo& gInfo = full->gInfo;
47 	ScalarFieldTilde half = ScalarFieldTildeData::alloc(gInfo, isGpuEnabled());
48 	#ifdef GPU_ENABLED
49 	RealG_gpu(gInfo.S, full->dataGpu(false), half->dataGpu(false), full->scale);
50 	#else
51 	threadLaunch(RealG_sub, gInfo.nG, gInfo.S, full->data(false), half->data(false), full->scale);
52 	#endif
53 	half->scale = 1;
54 	return half;
55 }
56 
Imag(const complexScalarField & C)57 ScalarField Imag(const complexScalarField& C)
58 {	const GridInfo& gInfo = C->gInfo;
59 	ScalarField I; nullToZero(I, gInfo);
60 	callPref(eblas_daxpy)(gInfo.nr, C->scale, ((const double*)C->dataPref(false))+1, 2, I->dataPref(false), 1);
61 	I->scale = 1;
62 	return I;
63 }
64 
ImagG_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * vFull,complex * vHalf,double scaleFac)65 void ImagG_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* vFull, complex* vHalf, double scaleFac)
66 {	THREAD_halfGspaceLoop( ImagG_calc(i, iG, S, vFull, vHalf, scaleFac); )
67 }
68 #ifdef GPU_ENABLED
69 void ImagG_gpu(const vector3<int> S, const complex* vFull, complex* vHalf, double scaleFac);
70 #endif
Imag(const complexScalarFieldTilde & full)71 ScalarFieldTilde Imag(const complexScalarFieldTilde& full)
72 {	const GridInfo& gInfo = full->gInfo;
73 	ScalarFieldTilde half = ScalarFieldTildeData::alloc(gInfo, isGpuEnabled());
74 	#ifdef GPU_ENABLED
75 	ImagG_gpu(gInfo.S, full->dataGpu(false), half->dataGpu(false), full->scale);
76 	#else
77 	threadLaunch(ImagG_sub, gInfo.nG, gInfo.S, full->data(false), half->data(false), full->scale);
78 	#endif
79 	half->scale = 1;
80 	return half;
81 }
82 
Complex(const ScalarField & R)83 complexScalarField Complex(const ScalarField& R)
84 {	const GridInfo& gInfo = R->gInfo;
85 	complexScalarField C; nullToZero(C, gInfo);
86 	callPref(eblas_daxpy)(gInfo.nr, R->scale, R->dataPref(false), 1, (double*)C->dataPref(false), 2);
87 	C->scale = 1;
88 	return C;
89 }
90 
Complex(const ScalarField & re,const ScalarField & im)91 complexScalarField Complex(const ScalarField& re, const ScalarField& im)
92 {	const GridInfo& gInfo = re->gInfo;
93 	complexScalarField C; nullToZero(C, gInfo);
94 	callPref(eblas_daxpy)(gInfo.nr, re->scale, re->dataPref(false), 1, (double*)(C->dataPref(false))+0, 2);
95 	callPref(eblas_daxpy)(gInfo.nr, im->scale, im->dataPref(false), 1, (double*)(C->dataPref(false))+1, 2);
96 	C->scale = 1;
97 	return C;
98 }
99 
ComplexG_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * vHalf,complex * vFull,double scaleFac)100 void ComplexG_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* vHalf, complex* vFull, double scaleFac)
101 {	THREAD_halfGspaceLoop( ComplexG_calc(i, iG, S, vHalf, vFull, scaleFac); )
102 }
103 #ifdef GPU_ENABLED
104 void ComplexG_gpu(const vector3<int> S, const complex* vHalf, complex* vFull, double scaleFac);
105 #endif
Complex(const ScalarFieldTilde & half)106 complexScalarFieldTilde Complex(const ScalarFieldTilde& half)
107 {	const GridInfo& gInfo = half->gInfo;
108 	complexScalarFieldTilde full = complexScalarFieldTildeData::alloc(gInfo, isGpuEnabled());
109 	#ifdef GPU_ENABLED
110 	ComplexG_gpu(gInfo.S, half->dataGpu(false), full->dataGpu(false), half->scale);
111 	#else
112 	threadLaunch(ComplexG_sub, gInfo.nG, gInfo.S, half->data(false), full->data(false), half->scale);
113 	#endif
114 	full->scale = 1;
115 	return full;
116 }
117 
118 
119 //------------------------------ Linear Unary operators ------------------------------
120 
O(const ScalarFieldTilde & in)121 ScalarFieldTilde O(const ScalarFieldTilde& in) { return in * in->gInfo.detR; }
O(ScalarFieldTilde && in)122 ScalarFieldTilde O(ScalarFieldTilde&& in) { return in *= in->gInfo.detR; }
O(const complexScalarFieldTilde & in)123 complexScalarFieldTilde O(const complexScalarFieldTilde& in) { return in * in->gInfo.detR; }
O(complexScalarFieldTilde && in)124 complexScalarFieldTilde O(complexScalarFieldTilde&& in) { return in *= in->gInfo.detR; }
125 
126 
127 //Forward transform
I(ScalarFieldTilde && in,int nThreads)128 ScalarField I(ScalarFieldTilde&& in, int nThreads)
129 {	//c2r transforms may destroy input, but this input can be destroyed
130 	ScalarField out(ScalarFieldData::alloc(in->gInfo, isGpuEnabled()));
131 	#ifdef GPU_ENABLED
132 	cufftExecZ2D(in->gInfo.planZ2D, (double2*)in->dataGpu(false), out->dataGpu(false));
133 	#else
134 	if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
135 	fftw_execute_dft_c2r(in->gInfo.getPlan(GridInfo::PlanCtoR, nThreads),
136 		(fftw_complex*)in->data(false), out->data(false));
137 	#endif
138 	out->scale = in->scale;
139 	return out;
140 }
I(const ScalarFieldTilde & in,int nThreads)141 ScalarField I(const ScalarFieldTilde& in, int nThreads)
142 {	//c2r transforms may destroy input, hence copy input (and let that copy be destroyed above)
143 	return I(in->clone(), nThreads);
144 }
I(const complexScalarFieldTilde & in,int nThreads)145 complexScalarField I(const complexScalarFieldTilde& in, int nThreads)
146 {	complexScalarField out(complexScalarFieldData::alloc(in->gInfo, isGpuEnabled()));
147 	#ifdef GPU_ENABLED
148 	cufftExecZ2Z(in->gInfo.planZ2Z, (double2*)in->dataGpu(false), (double2*)out->dataGpu(false), CUFFT_INVERSE);
149 	#else
150 	if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
151 	fftw_execute_dft(in->gInfo.getPlan(GridInfo::PlanInverse, nThreads),
152 		(fftw_complex*)in->data(false), (fftw_complex*)out->data(false));
153 	#endif
154 	out->scale = in->scale;
155 	return out;
156 }
I(complexScalarFieldTilde && in,int nThreads)157 complexScalarField I(complexScalarFieldTilde&& in, int nThreads)
158 {	//Destructible input (transform in place):
159 	#ifdef GPU_ENABLED
160 	cufftExecZ2Z(in->gInfo.planZ2Z, (double2*)in->dataGpu(false), (double2*)in->dataGpu(false), CUFFT_INVERSE);
161 	#else
162 	if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
163 	fftw_execute_dft(in->gInfo.getPlan(GridInfo::PlanInverseInPlace, nThreads),
164 		(fftw_complex*)in->data(false), (fftw_complex*)in->data(false));
165 	#endif
166 	return std::static_pointer_cast<complexScalarFieldData>(std::static_pointer_cast<FieldData<complex>>(in));
167 }
168 
169 //Forward transform h.c.
Idag(const ScalarField & in,int nThreads)170 ScalarFieldTilde Idag(const ScalarField& in, int nThreads)
171 {	//r2c transform does not destroy input (no backing up needed)
172 	ScalarFieldTilde out(ScalarFieldTildeData::alloc(in->gInfo, isGpuEnabled()));
173 	#ifdef GPU_ENABLED
174 	cufftExecD2Z(in->gInfo.planD2Z, in->dataGpu(false), (double2*)out->dataGpu(false));
175 	#else
176 	if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
177 	fftw_execute_dft_r2c(in->gInfo.getPlan(GridInfo::PlanRtoC, nThreads),
178 		in->data(false), (fftw_complex*)out->data(false));
179 	#endif
180 	out->scale = in->scale;
181 	return out;
182 }
Idag(const complexScalarField & in,int nThreads)183 complexScalarFieldTilde Idag(const complexScalarField& in, int nThreads)
184 {	complexScalarFieldTilde out(complexScalarFieldTildeData::alloc(in->gInfo, isGpuEnabled()));
185 	#ifdef GPU_ENABLED
186 	cufftExecZ2Z(in->gInfo.planZ2Z, (double2*)in->dataGpu(false), (double2*)out->dataGpu(false), CUFFT_FORWARD);
187 	#else
188 	if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
189 	fftw_execute_dft(in->gInfo.getPlan(GridInfo::PlanForward, nThreads),
190 		(fftw_complex*)in->data(false), (fftw_complex*)out->data(false));
191 	#endif
192 	out->scale = in->scale;
193 	return out;
194 }
Idag(complexScalarField && in,int nThreads)195 complexScalarFieldTilde Idag(complexScalarField&& in, int nThreads)
196 {	//Destructible input (transform in place):
197 	#ifdef GPU_ENABLED
198 	cufftExecZ2Z(in->gInfo.planZ2Z, (double2*)in->dataGpu(false), (double2*)in->dataGpu(false), CUFFT_FORWARD);
199 	#else
200 	if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
201 	fftw_execute_dft(in->gInfo.getPlan(GridInfo::PlanForwardInPlace, nThreads),
202 		(fftw_complex*)in->data(false), (fftw_complex*)in->data(false));
203 	#endif
204 	return std::static_pointer_cast<complexScalarFieldTildeData>(std::static_pointer_cast<FieldData<complex>>(in));
205 }
206 
207 //Reverse transform (same as Idag upto the normalization factor)
J(const ScalarField & in,int nThreads)208 ScalarFieldTilde J(const ScalarField& in, int nThreads) { return (1.0/in->gInfo.nr)*Idag(in, nThreads); }
J(const complexScalarField & in,int nThreads)209 complexScalarFieldTilde J(const complexScalarField& in, int nThreads) { return (1.0/in->gInfo.nr)*Idag(in, nThreads); }
J(complexScalarField && in,int nThreads)210 complexScalarFieldTilde J(complexScalarField&& in, int nThreads) { return Idag((complexScalarField&&)(in *= 1.0/in->gInfo.nr), nThreads); }
211 
212 //Reverse transform h.c. (same as I upto the normalization factor)
Jdag(const ScalarFieldTilde & in,int nThreads)213 ScalarField Jdag(const ScalarFieldTilde& in, int nThreads) { return (1.0/in->gInfo.nr)*I(in, nThreads); }
Jdag(ScalarFieldTilde && in,int nThreads)214 ScalarField Jdag(ScalarFieldTilde&& in, int nThreads) { return (1.0/in->gInfo.nr)*I(in, nThreads); }
Jdag(const complexScalarFieldTilde & in,int nThreads)215 complexScalarField Jdag(const complexScalarFieldTilde& in, int nThreads) { return (1.0/in->gInfo.nr)*I(in, nThreads); }
Jdag(complexScalarFieldTilde && in,int nThreads)216 complexScalarField Jdag(complexScalarFieldTilde&& in, int nThreads) { return I((complexScalarFieldTilde&&)(in *= 1.0/in->gInfo.nr), nThreads); }
217 
JdagOJ(const ScalarField & in)218 ScalarField JdagOJ(const ScalarField& in) { return in * in->gInfo.dV; }
JdagOJ(ScalarField && in)219 ScalarField JdagOJ(ScalarField&& in) { return in *= in->gInfo.dV; }
JdagOJ(const complexScalarField & in)220 complexScalarField JdagOJ(const complexScalarField& in) { return in * in->gInfo.dV; }
JdagOJ(complexScalarField && in)221 complexScalarField JdagOJ(complexScalarField&& in) { return in *= in->gInfo.dV; }
222 
223 
224 
L_sub(int i,double Gsq,complex * v)225 inline void L_sub(int i, double Gsq, complex* v)
226 {	v[i] *= Gsq;
227 }
228 #ifdef GPU_ENABLED //implemented in Operators.cu
229 void L_gpu(const vector3<int> S, const matrix3<> GGT, complex* v);
230 #endif
L(ScalarFieldTilde && in)231 ScalarFieldTilde L(ScalarFieldTilde&& in)
232 {	const GridInfo& gInfo = in->gInfo;
233 	in *= -gInfo.detR;
234 	#ifdef GPU_ENABLED
235 	L_gpu(gInfo.S, gInfo.GGT, in->dataGpu(false));
236 	#else
237 	applyFuncGsq(gInfo, L_sub, in->data(false));
238 	#endif
239 	return in;
240 }
L(const ScalarFieldTilde & in)241 ScalarFieldTilde L(const ScalarFieldTilde& in) { return L(in->clone()); }
242 
Linv_sub(int i,double Gsq,complex * v)243 inline void Linv_sub(int i, double Gsq, complex* v)
244 {	if(i==0) v[i]=0.0;
245 	else v[i] /= Gsq;
246 }
247 #ifdef GPU_ENABLED //implemented in Operators.cu
248 void Linv_gpu(const vector3<int> S, const matrix3<> GGT, complex* v);
249 #endif
Linv(ScalarFieldTilde && in)250 ScalarFieldTilde Linv(ScalarFieldTilde&& in)
251 {	const GridInfo& gInfo = in->gInfo;
252 	in *= (-1.0/gInfo.detR);
253 	#ifdef GPU_ENABLED
254 	Linv_gpu(gInfo.S, gInfo.GGT, in->dataGpu(false));
255 	#else
256 	applyFuncGsq(gInfo, Linv_sub, in->data(false));
257 	#endif
258 	return in;
259 }
Linv(const ScalarFieldTilde & in)260 ScalarFieldTilde Linv(const ScalarFieldTilde& in) { return Linv(in->clone()); }
261 
262 
fullL_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> GGT,complex * v)263 void fullL_sub(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<> GGT, complex* v)
264 {	THREAD_fullGspaceLoop( v[i] *= GGT.metric_length_squared(iG); )
265 }
266 #ifdef GPU_ENABLED //implemented in Operators.cu
267 void fullL_gpu(const vector3<int> S, const matrix3<> GGT, complex* v);
268 #endif
L(complexScalarFieldTilde && in)269 complexScalarFieldTilde L(complexScalarFieldTilde&& in)
270 {	const GridInfo& gInfo = in->gInfo;
271 	in *= -gInfo.detR;
272 	#ifdef GPU_ENABLED
273 	fullL_gpu(gInfo.S, gInfo.GGT, in->dataGpu(false));
274 	#else
275 	threadLaunch(fullL_sub, gInfo.nr, gInfo.S, gInfo.GGT, in->data(false));
276 	#endif
277 	return in;
278 }
L(const complexScalarFieldTilde & in)279 complexScalarFieldTilde L(const complexScalarFieldTilde& in) { return L(in->clone()); }
280 
fullLinv_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> GGT,complex * v)281 void fullLinv_sub(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<> GGT, complex* v)
282 {	THREAD_fullGspaceLoop( v[i] *= i ? (1.0/GGT.metric_length_squared(iG)) : 0.0; )
283 }
284 #ifdef GPU_ENABLED //implemented in Operators.cu
285 void fullLinv_gpu(const vector3<int> S, const matrix3<> GGT, complex* v);
286 #endif
Linv(complexScalarFieldTilde && in)287 complexScalarFieldTilde Linv(complexScalarFieldTilde&& in)
288 {	const GridInfo& gInfo = in->gInfo;
289 	in *= (-1.0/gInfo.detR);
290 	#ifdef GPU_ENABLED
291 	fullLinv_gpu(gInfo.S, gInfo.GGT, in->dataGpu(false));
292 	#else
293 	threadLaunch(fullLinv_sub, gInfo.nr, gInfo.S, gInfo.GGT, in->data(false));
294 	#endif
295 	return in;
296 }
Linv(const complexScalarFieldTilde & in)297 complexScalarFieldTilde Linv(const complexScalarFieldTilde& in) { return Linv(in->clone()); }
298 
299 
Lstress_thread(size_t iStart,size_t iStop,const vector3<int> S,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)300 void Lstress_thread(size_t iStart, size_t iStop, const vector3<int> S, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
301 {	THREAD_halfGspaceLoop
302 	(	double weight = ((iG[2]==0) or (2*iG[2]==S[2])) ? 1 : 2; //weight factor for points in reduced reciprocal space of real scalar fields
303 		grad_RRT[i] = (weight * real(X[i].conj() * Y[i])) * outer(vector3<>(iG));
304 	)
305 }
Lstress(const vector3<int> S,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)306 inline void Lstress(const vector3<int> S, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
307 {	threadLaunch(Lstress_thread, S[0]*S[1]*(S[2]/2+1), S, X, Y, grad_RRT);
308 }
309 #ifdef GPU_ENABLED //implemented in Operators.cu
310 void Lstress_gpu(const vector3<int> S, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT);
311 #endif
Lstress(const ScalarFieldTilde & X,const ScalarFieldTilde & Y)312 matrix3<> Lstress(const ScalarFieldTilde& X, const ScalarFieldTilde& Y)
313 {	const GridInfo& gInfo = X->gInfo;
314 	ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
315 	callPref(Lstress)(gInfo.S, X->dataPref(), Y->dataPref(), result.dataPref());
316 	matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
317 	return (2.*gInfo.detR) * (gInfo.GT * resultSum * gInfo.G);
318 }
319 
LinvStress_thread(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> & GGT,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)320 void LinvStress_thread(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<>& GGT, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
321 {	THREAD_halfGspaceLoop
322 	(	double weight = ((iG[2]==0) or (2*iG[2]==S[2])) ? 1 : 2; //weight factor for points in reduced reciprocal space of real scalar fields
323 		double Gsq = GGT.metric_length_squared(iG);
324 		double GsqInv = Gsq ? 1./Gsq : 0;
325 		grad_RRT[i] = (weight * real(X[i].conj() * Y[i]) * (-GsqInv*GsqInv)) * outer(vector3<>(iG));
326 	)
327 }
LinvStress(const vector3<int> S,const matrix3<> & GGT,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)328 inline void LinvStress(const vector3<int> S, const matrix3<>& GGT, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
329 {	threadLaunch(LinvStress_thread, S[0]*S[1]*(S[2]/2+1), S, GGT, X, Y, grad_RRT);
330 }
331 #ifdef GPU_ENABLED //implemented in Operators.cu
332 void LinvStress_gpu(const vector3<int> S, const matrix3<>& GGT, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT);
333 #endif
LinvStress(const ScalarFieldTilde & X,const ScalarFieldTilde & Y)334 matrix3<> LinvStress(const ScalarFieldTilde& X, const ScalarFieldTilde& Y)
335 {	const GridInfo& gInfo = X->gInfo;
336 	ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
337 	callPref(LinvStress)(gInfo.S, gInfo.GGT, X->dataPref(), Y->dataPref(), result.dataPref());
338 	matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
339 	return (2.*gInfo.detR) * (gInfo.GT * resultSum * gInfo.G);
340 }
341 
342 
zeroNyquist_sub(size_t iStart,size_t iStop,const vector3<int> S,Scalar * data)343 template<typename Scalar> void zeroNyquist_sub(size_t iStart, size_t iStop, const vector3<int> S, Scalar* data)
344 {	THREAD_halfGspaceLoop( if(IS_NYQUIST) data[i] = Scalar(0.0); )
345 }
zeroNyquist(RealKernel & K)346 void zeroNyquist(RealKernel& K)
347 {	threadLaunch(zeroNyquist_sub<double>, K.gInfo.nG, K.gInfo.S, K.data());
348 }
zeroNyquist(ScalarFieldTilde & Gptr)349 void zeroNyquist(ScalarFieldTilde& Gptr)
350 {	const GridInfo& gInfo = Gptr->gInfo;
351 	threadLaunch(zeroNyquist_sub<complex>, gInfo.nG, gInfo.S, Gptr->data());
352 }
zeroNyquist(ScalarField & Rptr)353 void zeroNyquist(ScalarField& Rptr) { ScalarFieldTilde Rtilde=J(Rptr); zeroNyquist(Rtilde); Rptr = I((ScalarFieldTilde&&)Rtilde); }
354 
removePhase(size_t N,complex * data,double & meanPhase,double & sigmaPhase,double & rmsImagErr)355 void removePhase(size_t N, complex* data, double& meanPhase, double& sigmaPhase, double& rmsImagErr)
356 {	//Find mean phase
357 	double w0=0.0, r1=0.0, r2=0.0, i1=0.0, i2=0.0; //moments of normalized real and imaginary parts
358 	for(size_t i=0; i<N; i++)
359 	{	complex c = data[i]*data[i];
360 		double w = abs(c); //weight
361 		if(w > 1e-300)
362 		{	w0 += w;
363 			r1 += c.real();
364 			i1 += c.imag();
365 			r2 += c.real() * c.real() / w;
366 			i2 += c.imag() * c.imag() / w;
367 		}
368 	}
369 	double rMean=r1/w0, rSigma=sqrt(std::max(0.,r2/w0-pow(rMean,2)));
370 	double iMean=i1/w0, iSigma=sqrt(std::max(0.,i2/w0-pow(iMean,2)));
371 	meanPhase = 0.5*atan2(iMean, rMean);
372 	sigmaPhase = 0.5*hypot(iMean*rSigma, rMean*iSigma)/(pow(rMean,2) + pow(iMean,2));
373 
374 	//Remove phase:
375 	complex phaseCompensate = cis(-meanPhase);
376 	double imSqSum=0.0, reSqSum=0.0;
377 	for(size_t i=0; i<N; i++)
378 	{	complex& c = data[i];
379 		c *= phaseCompensate;
380 		reSqSum += pow(c.real(),2);
381 		imSqSum += pow(c.imag(),2);
382 		c = c.real();
383 	}
384 	rmsImagErr = sqrt(imSqSum/(imSqSum+reSqSum));
385 }
386 
387 
388 //------------------------------ Spatial gradients of scalar fields ---------------------------------
389 
390 //first cartesian derivative
D_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * in,complex * out,vector3<> Ge)391 void D_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* in, complex* out, vector3<> Ge)
392 {	THREAD_halfGspaceLoop( D_calc(i, iG, in, out, Ge); )
393 }
394 #ifdef GPU_ENABLED
395 void D_gpu(const vector3<int> S, const complex* in, complex* out, vector3<> Ge);
396 #endif
D(const ScalarFieldTilde & in,int iDir)397 ScalarFieldTilde D(const ScalarFieldTilde& in, int iDir)
398 {	const GridInfo& gInfo = in->gInfo;
399 	ScalarFieldTilde out(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled()));
400 	#ifdef GPU_ENABLED
401 	D_gpu(gInfo.S, in->dataGpu(), out->dataGpu(), gInfo.G.column(iDir));
402 	#else
403 	threadLaunch(D_sub, gInfo.nG, gInfo.S, in->data(), out->data(), gInfo.G.column(iDir));
404 	#endif
405 	return out;
406 }
407 
408 //second cartesian derivative
DD_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * in,complex * out,vector3<> Ge1,vector3<> Ge2)409 void DD_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* in, complex* out, vector3<> Ge1, vector3<> Ge2)
410 {	THREAD_halfGspaceLoop( DD_calc(i, iG, in, out, Ge1, Ge2); )
411 }
412 #ifdef GPU_ENABLED
413 void DD_gpu(const vector3<int> S, const complex* in, complex* out, vector3<> g, vector3<> q);
414 #endif
DD(const ScalarFieldTilde & in,int iDir,int jDir)415 ScalarFieldTilde DD(const ScalarFieldTilde& in, int iDir, int jDir)
416 {	const GridInfo& gInfo = in->gInfo;
417 	ScalarFieldTilde out(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled()));
418 	#ifdef GPU_ENABLED
419 	DD_gpu(gInfo.S, in->dataGpu(), out->dataGpu(), gInfo.G.column(iDir), gInfo.G.column(jDir));
420 	#else
421 	threadLaunch(DD_sub, gInfo.nG, gInfo.S, in->data(), out->data(), gInfo.G.column(iDir), gInfo.G.column(jDir));
422 	#endif
423 	return out;
424 }
425 
426 
lGradient_sub(size_t iStart,size_t iStop,const vector3<int> & S,const complex * in,const array<complex *,2* l+1> & out,const matrix3<> & G)427 template<int l> void lGradient_sub(size_t iStart, size_t iStop, const vector3<int>& S, const complex* in, const array<complex*, 2*l+1>& out, const matrix3<>& G)
428 {	THREAD_halfGspaceLoop( lGradient_calc<l>(i, iG, IS_NYQUIST, in, out, G); )
429 }
lGradient(const vector3<int> & S,const complex * in,array<complex *,2* l+1> out,const matrix3<> & G)430 template<int l> void lGradient(const vector3<int>& S, const complex* in, array<complex*, 2*l+1> out, const matrix3<>& G)
431 {	threadLaunch(lGradient_sub<l>, S[0]*S[1]*(S[2]/2+1), S, in, out, G);
432 }
lGradient(const vector3<int> & S,const complex * in,std::vector<complex * > out,int l,const matrix3<> & G)433 void lGradient(const vector3<int>& S, const complex* in, std::vector<complex*> out, int l, const matrix3<>& G)
434 {	SwitchTemplate_l(l, lGradient, (S, in, out, G))
435 }
436 #ifdef GPU_ENABLED
437 void lGradient_gpu(const vector3<int>& S, const complex* in, std::vector<complex*> out, int l, const matrix3<>& G);
438 #endif
439 
lGradient(const ScalarFieldTilde & in,int l)440 ScalarFieldTildeArray lGradient(const ScalarFieldTilde& in, int l)
441 {	ScalarFieldTildeArray out; nullToZero(out, in->gInfo, 2*l+1);
442 	callPref(lGradient)(in->gInfo.S, in->dataPref(), dataPref(out), l, in->gInfo.G);
443 	return out;
444 }
445 
lDivergence_sub(size_t iStart,size_t iStop,const vector3<int> & S,const array<const complex *,2* l+1> & in,complex * out,const matrix3<> & G)446 template<int l> void lDivergence_sub(size_t iStart, size_t iStop, const vector3<int>& S, const array<const complex*,2*l+1>& in, complex* out, const matrix3<>& G)
447 {	THREAD_halfGspaceLoop( lDivergence_calc<l>(i, iG, IS_NYQUIST, in, out, G); )
448 }
lDivergence(const vector3<int> & S,array<const complex *,2* l+1> in,complex * out,const matrix3<> & G)449 template<int l> void lDivergence(const vector3<int>& S, array<const complex*,2*l+1> in, complex* out, const matrix3<>& G)
450 {	threadLaunch(lDivergence_sub<l>, S[0]*S[1]*(S[2]/2+1), S, in, out, G);
451 }
lDivergence(const vector3<int> & S,const std::vector<const complex * > & in,complex * out,int l,const matrix3<> & G)452 void lDivergence(const vector3<int>& S, const std::vector<const complex*>& in, complex* out, int l, const matrix3<>& G)
453 {	SwitchTemplate_l(l, lDivergence, (S, in, out, G))
454 }
455 #ifdef GPU_ENABLED
456 void lDivergence_gpu(const vector3<int>& S, const std::vector<const complex*>& in, complex* out, int l, const matrix3<>& G);
457 #endif
458 
lDivergence(const ScalarFieldTildeArray & in,int l)459 ScalarFieldTilde lDivergence(const ScalarFieldTildeArray& in, int l)
460 {	assert(int(in.size()) == 2*l+1);
461 	ScalarFieldTilde out; nullToZero(out, in[0]->gInfo);
462 	callPref(lDivergence)(in[0]->gInfo.S, constDataPref(in), out->dataPref(), l, in[0]->gInfo.G);
463 	return out;
464 }
465 
466 
lGradientStress_sub(size_t iStart,size_t iStop,const vector3<int> & S,const matrix3<> & G,const RadialFunctionG & w,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)467 template<int l, int m> void lGradientStress_sub(size_t iStart, size_t iStop, const vector3<int>& S, const matrix3<>& G,
468 		const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
469 {
470 	const complex lPhase = cis(l*0.5*M_PI);
471 	THREAD_halfGspaceLoop(
472 		double weight = real(conj(X[i]) * Y[i] * lPhase) * (IS_NYQUIST
473 			? 0 //drop nyquist frequency contributions
474 			: (iG[2]==0 ? 1 : 2) ); //reciprocal space weights
475 		grad_RRT[i] = weight * lGradientStress_calc<l*(l+1)+m>(iG, G, w);
476 	)
477 }
lGradientStress(const vector3<int> & S,const matrix3<> & G,const RadialFunctionG & w,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)478 template<int l, int m> void lGradientStress(const vector3<int>& S, const matrix3<>& G, const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
479 {	threadLaunch(lGradientStress_sub<l,m>, S[0]*S[1]*(S[2]/2+1), S, G, w, X, Y, grad_RRT);
480 }
lGradientStress(const vector3<int> & S,const matrix3<> & G,const RadialFunctionG & w,const complex * X,const complex * Y,int l,int m,symmetricMatrix3<> * grad_RRT)481 void lGradientStress(const vector3<int>& S, const matrix3<>& G, const RadialFunctionG& w, const complex* X, const complex* Y, int l, int m, symmetricMatrix3<>* grad_RRT)
482 {	SwitchTemplate_lm(l, m, lGradientStress, (S, G, w, X, Y, grad_RRT))
483 }
484 #ifdef GPU_ENABLED
485 void lGradientStress_gpu(const vector3<int>& S, const matrix3<>& G, const RadialFunctionG& w, const complex* X, const complex* Y, int l, int m, symmetricMatrix3<>* grad_RRT);
486 #endif
lGradientStress(const RadialFunctionG & w,const ScalarFieldTilde & X,const ScalarFieldTilde & Y,int l,int m)487 matrix3<> lGradientStress(const RadialFunctionG& w, const ScalarFieldTilde& X, const ScalarFieldTilde& Y, int l, int m)
488 {	const GridInfo& gInfo = X->gInfo;
489 	ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
490 	callPref(lGradientStress)(gInfo.S, gInfo.G, w, X->dataPref(), Y->dataPref(), l, m, result.dataPref());
491 	matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
492 	return (-gInfo.detR) * resultSum;
493 }
494 
495 
multiplyBlochPhase_sub(size_t iStart,size_t iStop,const vector3<int> & S,const vector3<> & invS,complex * v,const vector3<> & k)496 void multiplyBlochPhase_sub(size_t iStart, size_t iStop,
497 	const vector3<int>& S, const vector3<>& invS, complex* v, const vector3<>& k)
498 {	THREAD_rLoop( v[i] *= blochPhase_calc(iv, invS, k); )
499 }
500 #ifdef GPU_ENABLED
501 void multiplyBlochPhase_gpu(const vector3<int>& S, const vector3<>& invS, complex* v, const vector3<>& k);
502 #endif
multiplyBlochPhase(complexScalarField & v,const vector3<> & k)503 void multiplyBlochPhase(complexScalarField& v, const vector3<>& k)
504 {	const GridInfo& gInfo = v->gInfo;
505 	vector3<> invS(1./gInfo.S[0], 1./gInfo.S[1], 1./gInfo.S[2]);
506 	#ifdef GPU_ENABLED
507 	multiplyBlochPhase_gpu(gInfo.S, invS, v->dataGpu(), k);
508 	#else
509 	threadLaunch(multiplyBlochPhase_sub, gInfo.nr, gInfo.S, invS, v->data(), k);
510 	#endif
511 }
512 
513 //------ RadialFunction related operators ------
514 
radialFunction_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> & GGT,complex * F,const RadialFunctionG & f,vector3<> r0)515 void radialFunction_sub(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<>& GGT,
516 	complex* F, const RadialFunctionG& f, vector3<> r0 )
517 {	THREAD_halfGspaceLoop( F[i] = radialFunction_calc(iG, GGT, f, r0); )
518 }
519 #ifdef GPU_ENABLED
520 void radialFunction_gpu(const vector3<int> S, const matrix3<>& GGT,
521 	complex* F, const RadialFunctionG& f, vector3<> r0);
522 #endif
radialFunctionG(const GridInfo & gInfo,const RadialFunctionG & f,vector3<> r0)523 ScalarFieldTilde radialFunctionG(const GridInfo& gInfo, const RadialFunctionG& f, vector3<> r0)
524 {
525 	ScalarFieldTilde F(ScalarFieldTildeData::alloc(gInfo,isGpuEnabled()));
526 	#ifdef GPU_ENABLED
527 	radialFunction_gpu(gInfo.S, gInfo.GGT, F->dataGpu(), f, r0);
528 	#else
529 	threadLaunch(radialFunction_sub, gInfo.nG, gInfo.S, gInfo.GGT, F->data(), f, r0);
530 	#endif
531 	return F;
532 }
533 
radialFunction(const GridInfo & gInfo,const RadialFunctionG & f,vector3<> r0)534 ScalarField radialFunction(const GridInfo& gInfo, const RadialFunctionG& f, vector3<> r0)
535 {
536 	ScalarFieldTilde F = radialFunctionG(gInfo, f, r0);
537 	return (1.0/gInfo.detR) * I(F);
538 }
539 
radialFunctionG(const RadialFunctionG & f,RealKernel & Kernel)540 void radialFunctionG(const RadialFunctionG& f, RealKernel& Kernel)
541 {
542 	ScalarFieldTilde F = radialFunctionG(Kernel.gInfo, f, vector3<>(0,0,0));
543 	const complex* FData = F->data(); //put F into Kernel
544 	double* KernelData = Kernel.data();
545 	for(int i=0; i<Kernel.gInfo.nG; i++)
546 		KernelData[i] = FData[i].real();
547 }
548 
549 
radialFunctionMultiply_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> & GGT,complex * in,const RadialFunctionG & f)550 void radialFunctionMultiply_sub(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<>& GGT,
551 	complex* in, const RadialFunctionG& f)
552 {	THREAD_halfGspaceLoop( in[i] *= f(sqrt(GGT.metric_length_squared(iG))); )
553 }
554 #ifdef GPU_ENABLED
555 void radialFunctionMultiply_gpu(const vector3<int> S, const matrix3<>& GGT, complex* in, const RadialFunctionG& f);
556 #endif
557 
operator *(const RadialFunctionG & f,ScalarFieldTilde && in)558 ScalarFieldTilde operator*(const RadialFunctionG& f, ScalarFieldTilde&& in)
559 {	const GridInfo& gInfo = in->gInfo;
560 	#ifdef GPU_ENABLED
561 	radialFunctionMultiply_gpu(gInfo.S, gInfo.GGT, in->dataGpu(), f);
562 	#else
563 	threadLaunch(radialFunctionMultiply_sub, gInfo.nG, gInfo.S, gInfo.GGT, in->data(), f);
564 	#endif
565 	return in;
566 }
567 
operator *(const RadialFunctionG & f,const ScalarFieldTilde & in)568 ScalarFieldTilde operator*(const RadialFunctionG& f, const ScalarFieldTilde& in)
569 {	ScalarFieldTilde out(in->clone()); //destructible copy
570 	return f * ((ScalarFieldTilde&&)out);
571 }
572 
operator *(const RadialFunctionG & f,VectorFieldTilde && in)573 VectorFieldTilde operator*(const RadialFunctionG& f, VectorFieldTilde&& in)
574 {	for(int k=0; k<3; k++) in[k] = f * (ScalarFieldTilde&&)in[k];
575 	return in;
576 }
577 
operator *(const RadialFunctionG & f,const VectorFieldTilde & in)578 VectorFieldTilde operator*(const RadialFunctionG& f, const VectorFieldTilde& in)
579 {	VectorFieldTilde out;
580 	for(int k=0; k<3; k++) out[k] = f * in[k];
581 	return out;
582 }
583 
584 //Stress due to dot(X,O(w*Y))
convolveStress_thread(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> & GGT,const RadialFunctionG & w,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)585 void convolveStress_thread(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<>& GGT, const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
586 {	THREAD_halfGspaceLoop
587 	(	double weight = ((iG[2]==0) or (2*iG[2]==S[2])) ? 1 : 2; //weight factor for points in reduced reciprocal space of real scalar fields
588 		double G = sqrt(GGT.metric_length_squared(iG));
589 		double minus_wPrime_by_G = G ? (-w.deriv(G)/G) : 0.;
590 		grad_RRT[i] = (weight * minus_wPrime_by_G * real(X[i].conj() * Y[i])) * outer(vector3<>(iG));
591 	)
592 }
convolveStress(const vector3<int> S,const matrix3<> & GGT,const RadialFunctionG & w,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)593 inline void convolveStress(const vector3<int> S, const matrix3<>& GGT, const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
594 {	threadLaunch(convolveStress_thread, S[0]*S[1]*(S[2]/2+1), S, GGT, w, X, Y, grad_RRT);
595 }
596 #ifdef GPU_ENABLED //implemented in Operators.cu
597 void convolveStress_gpu(const vector3<int> S, const matrix3<>& GGT, const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT);
598 #endif
convolveStress(const RadialFunctionG & w,const ScalarFieldTilde & X,const ScalarFieldTilde & Y)599 matrix3<> convolveStress(const RadialFunctionG& w, const ScalarFieldTilde& X, const ScalarFieldTilde& Y)
600 {	const GridInfo& gInfo = X->gInfo;
601 	ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
602 	callPref(convolveStress)(gInfo.S, gInfo.GGT, w, X->dataPref(), Y->dataPref(), result.dataPref());
603 	matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
604 	return gInfo.detR * (gInfo.GT * resultSum * gInfo.G);
605 }
606 
607 //------------------------------ Nonlinear Unary operators ------------------------------
608 
exp_sub(size_t i,double * X,double prefac)609 void exp_sub(size_t i, double* X, double prefac) { X[i] = exp(prefac*X[i]); }
610 #ifdef GPU_ENABLED
611 void exp_gpu(int N, double* X, double prefac); //in operators.cu
612 #endif
exp(ScalarField && X)613 ScalarField exp(ScalarField&& X)
614 {
615 	#ifdef GPU_ENABLED
616 	exp_gpu(X->nElem, X->dataGpu(false), X->scale);
617 	#else
618 	threadedLoop(exp_sub, X->nElem, X->data(false), X->scale);
619 	#endif
620 	X->scale = 1.0;
621 	return X;
622 }
exp(const ScalarField & X)623 ScalarField exp(const ScalarField& X) { return exp(X->clone()); }
624 
log_sub(size_t i,double * X,double prefac)625 void log_sub(size_t i, double* X, double prefac) { X[i] = log(prefac*X[i]); }
626 #ifdef GPU_ENABLED
627 void log_gpu(int N, double* X, double prefac); //in operators.cu
628 #endif
log(ScalarField && X)629 ScalarField log(ScalarField&& X)
630 {
631 	#ifdef GPU_ENABLED
632 	log_gpu(X->nElem, X->dataGpu(false), X->scale);
633 	#else
634 	threadedLoop(log_sub, X->nElem, X->data(false), X->scale);
635 	#endif
636 	X->scale = 1.0;
637 	return X;
638 }
log(const ScalarField & X)639 ScalarField log(const ScalarField& X) { return log(X->clone()); }
640 
641 
sqrt_sub(size_t i,double * X,double prefac)642 void sqrt_sub(size_t i, double* X, double prefac) { X[i] = sqrt(prefac*X[i]); }
643 #ifdef GPU_ENABLED
644 void sqrt_gpu(int N, double* X, double prefac); //in operators.cu
645 #endif
sqrt(ScalarField && X)646 ScalarField sqrt(ScalarField&& X)
647 {
648 	#ifdef GPU_ENABLED
649 	sqrt_gpu(X->nElem, X->dataGpu(false), X->scale);
650 	#else
651 	threadedLoop(sqrt_sub, X->nElem, X->data(false), X->scale);
652 	#endif
653 	X->scale = 1.0;
654 	return X;
655 }
sqrt(const ScalarField & X)656 ScalarField sqrt(const ScalarField& X) { return sqrt(X->clone()); }
657 
658 
inv_sub(size_t i,double * X,double prefac)659 void inv_sub(size_t i, double* X, double prefac) { X[i] = prefac/X[i]; }
660 #ifdef GPU_ENABLED
661 void inv_gpu(int N, double* X, double prefac); //in operators.cu
662 #endif
inv(ScalarField && X)663 ScalarField inv(ScalarField&& X)
664 {
665 	#ifdef GPU_ENABLED
666 	inv_gpu(X->nElem, X->dataGpu(false), 1.0/X->scale);
667 	#else
668 	threadedLoop(inv_sub, X->nElem, X->data(false), 1.0/X->scale);
669 	#endif
670 	X->scale = 1.0;
671 	return X;
672 }
inv(const ScalarField & X)673 ScalarField inv(const ScalarField& X) { return inv(X->clone()); }
674 
pow_sub(size_t i,double * X,double scale,double alpha)675 void pow_sub(size_t i, double* X, double scale, double alpha) { X[i] = pow(scale*X[i],alpha); }
676 #ifdef GPU_ENABLED
677 void pow_gpu(int N, double* X, double scale, double alpha); //in operators.cu
678 #endif
pow(ScalarField && X,double alpha)679 ScalarField pow(ScalarField&& X, double alpha)
680 {
681 	#ifdef GPU_ENABLED
682 	pow_gpu(X->nElem, X->dataGpu(false), X->scale, alpha);
683 	#else
684 	threadedLoop(pow_sub, X->nElem, X->data(false), X->scale, alpha);
685 	#endif
686 	X->scale = 1.0;
687 	return X;
688 }
pow(const ScalarField & X,double alpha)689 ScalarField pow(const ScalarField& X, double alpha) { return pow(X->clone(), alpha); }
690 
691 
692 //------------------------------ Multiplication operators------------------------------
693 
operator *=(ScalarField & in,const ScalarField & other)694 ScalarField& operator*=(ScalarField& in, const ScalarField& other)
695 {	in->scale *= other->scale;
696 	callPref(eblas_dmul)(in->nElem, other->dataPref(false), 1, in->dataPref(false), 1);
697 	return in;
698 }
699 
operator *=(complexScalarField & in,const ScalarField & other)700 complexScalarField& operator*=(complexScalarField& in, const ScalarField& other)
701 {	in->scale *= other->scale;
702 	callPref(eblas_zmuld)(in->nElem, other->dataPref(false), 1, in->dataPref(false), 1);
703 	return in;
704 }
operator *(const complexScalarField & inC,const ScalarField & inR)705 complexScalarField operator*(const complexScalarField& inC, const ScalarField& inR) { complexScalarField out(inC->clone()); return out *= inR; }
operator *(const ScalarField & inR,const complexScalarField & inC)706 complexScalarField operator*(const ScalarField& inR, const complexScalarField& inC) { complexScalarField out(inC->clone()); return out *= inR; }
operator *(complexScalarField && inC,const ScalarField & inR)707 complexScalarField operator*(complexScalarField&& inC, const ScalarField& inR) { return inC *= inR; }
operator *(const ScalarField & inR,complexScalarField && inC)708 complexScalarField operator*(const ScalarField& inR, complexScalarField&& inC) { return inC *= inR; }
709 
operator *=(ScalarFieldTilde & inG,const RealKernel & inR)710 ScalarFieldTilde& operator*=(ScalarFieldTilde& inG, const RealKernel& inR)
711 {	callPref(eblas_zmuld)(inG->nElem, inR.dataPref(), 1, inG->dataPref(false), 1);
712 	return inG;
713 }
operator *(const RealKernel & inR,const ScalarFieldTilde & inG)714 ScalarFieldTilde operator*(const RealKernel& inR, const ScalarFieldTilde& inG) { ScalarFieldTilde out(inG->clone()); return out *= inR; }
operator *(const ScalarFieldTilde & inG,const RealKernel & inR)715 ScalarFieldTilde operator*(const ScalarFieldTilde& inG, const RealKernel& inR) { ScalarFieldTilde out(inG->clone()); return out *= inR; }
operator *(const RealKernel & inR,ScalarFieldTilde && inG)716 ScalarFieldTilde operator*(const RealKernel& inR, ScalarFieldTilde&& inG) { return inG *= inR; }
operator *(ScalarFieldTilde && inG,const RealKernel & inR)717 ScalarFieldTilde operator*(ScalarFieldTilde&& inG, const RealKernel& inR) { return inG *= inR; }
718 
719 
720 //------------------------------ Linear combine operators ------------------------------
721 
axpy(double alpha,const ScalarField & X,ScalarField & Y)722 void axpy(double alpha, const ScalarField& X, ScalarField& Y)
723 {	if(X)
724 	{	if(Y)
725 		{	if(Y->scale == 0.0) { Y = X * alpha; }
726 			else callPref(eblas_daxpy)(X->nElem, alpha*X->scale/Y->scale, X->dataPref(false), 1, Y->dataPref(false), 1);
727 		}
728 		else Y = X * alpha;
729 	}
730 	//if X is null, nothing needs to be done, Y remains unchanged
731 }
operator +=(ScalarField & in,double scalar)732 ScalarField& operator+=(ScalarField& in, double scalar)
733 {	ManagedArray<double> dataScalar(&scalar, 1); //Managed array holding scalar
734 	callPref(eblas_daxpy)(in->nElem, 1.0, dataScalar.dataPref(), 0, in->dataPref(), 1);
735 	return in;
736 }
operator +(double scalar,const ScalarField & in)737 ScalarField operator+(double scalar, const ScalarField& in) { ScalarField out(in->clone()); return out += scalar; }
operator +(const ScalarField & in,double scalar)738 ScalarField operator+(const ScalarField& in, double scalar) { ScalarField out(in->clone()); return out += scalar; }
operator +(double scalar,ScalarField && in)739 ScalarField operator+(double scalar, ScalarField&& in) { return in += scalar; }
operator +(ScalarField && in,double scalar)740 ScalarField operator+(ScalarField&& in, double scalar) { return in += scalar; }
operator -=(ScalarField & in,double scalar)741 ScalarField& operator-=(ScalarField& in, double scalar)
742 {	return (in += -scalar);
743 }
operator -(double scalar,const ScalarField & in)744 ScalarField operator-(double scalar, const ScalarField& in) { ScalarField out(in->clone()); return (out *= -1.0) += scalar; }
operator -(const ScalarField & in,double scalar)745 ScalarField operator-(const ScalarField& in, double scalar) { ScalarField out(in->clone()); return out -= scalar; }
operator -(double scalar,ScalarField && in)746 ScalarField operator-(double scalar, ScalarField&& in) { return (in *= -1.0) += scalar; }
operator -(ScalarField && in,double scalar)747 ScalarField operator-(ScalarField&& in, double scalar) { return in -= scalar; }
748 
749 
750 //------------------------------ Dot products and 2-norms ------------------------------
751 
dot(const ScalarField & X,const ScalarField & Y)752 double dot(const ScalarField& X, const ScalarField& Y)
753 {	return X->scale * Y->scale * callPref(eblas_ddot)(X->nElem, X->dataPref(false), 1, Y->dataPref(false), 1);
754 }
755 
dot(const ScalarFieldTilde & X,const ScalarFieldTilde & Y)756 double dot(const ScalarFieldTilde& X, const ScalarFieldTilde& Y)
757 {	int N = X->nElem;
758 	int S2 = X->gInfo.S[2]/2 + 1; //inner dimension
759 	int S01 = X->gInfo.S[0] * X->gInfo.S[1]; //number of inner dimension slices
760 	complex complexDot = callPref(eblas_zdotc)(N, X->dataPref(false), 1, Y->dataPref(false), 1);
761 	complex correction1 = callPref(eblas_zdotc)(S01, X->dataPref(false), S2, Y->dataPref(false), S2);
762 	complex correction2 = callPref(eblas_zdotc)(S01, X->dataPref(false)+S2-1, S2, Y->dataPref(false)+S2-1, S2);
763 	if(S2==1) correction2=complex(0,0); //because slices 1 and 2 are the same
764 	return X->scale * Y->scale * (2.0*complexDot - correction1 - correction2).real();
765 }
766 
nrm2(const ScalarField & X)767 double nrm2(const ScalarField& X)
768 {	return fabs(X->scale) * callPref(eblas_dnrm2)(X->nElem, X->dataPref(false), 1);
769 }
770 
nrm2(const ScalarFieldTilde & X)771 double nrm2(const ScalarFieldTilde& X)
772 {	int N = X->nElem;
773 	int S2 = X->gInfo.S[2]/2 + 1; //inner dimension
774 	int S01 = X->gInfo.S[0] * X->gInfo.S[1]; //number of inner dimension slices
775 	double complexNorm = callPref(eblas_dznrm2)(N, X->dataPref(false), 1);
776 	double correction1 = callPref(eblas_dznrm2)(S01, X->dataPref(false), S2);
777 	double correction2 = callPref(eblas_dznrm2)(S01, X->dataPref(false)+S2-1, S2);
778 	if(S2==1) correction2=0; //because slices 1 and 2 are the same
779 	return fabs(X->scale) * sqrt(2*pow(complexNorm,2) - pow(correction1,2) - pow(correction2,2));
780 }
781 
sum(const ScalarField & X)782 double sum(const ScalarField& X)
783 {	ManagedArray<double> dataScale(&X->scale, 1); //Managed array holding X->scale
784 	return callPref(eblas_ddot)(X->nElem, X->dataPref(false), 1, dataScale.dataPref(), 0);
785 }
786 
sum(const ScalarFieldTilde & X)787 double sum(const ScalarFieldTilde& X)
788 {	int N = X->nElem;
789 	int S2 = X->gInfo.S[2]/2 + 1; //inner dimension
790 	int S01 = X->gInfo.S[0] * X->gInfo.S[1]; //number of inner dimension slices
791 	ManagedArray<complex> dataOne(std::vector<complex>(1, complex(1.,0.))); //Managed data storing "1"
792 	complex complexSum = callPref(eblas_zdotc)(N, X->dataPref(false), 1, dataOne.dataPref(), 0);
793 	complex correction1 = callPref(eblas_zdotc)(S01, X->dataPref(false), S2, dataOne.dataPref(), 0);
794 	complex correction2 = callPref(eblas_zdotc)(S01, X->dataPref(false)+S2-1, S2, dataOne.dataPref(), 0);
795 	if(S2==1) correction2 = 0; //because slices 1 and 2 are the same
796 	return X->scale * (2.0*complexSum - correction1 - correction2).real();
797 }
798 
799 
800 
integral(const ScalarField & X)801 double integral(const ScalarField& X)
802 {	return X->gInfo.dV * sum(X);
803 }
integral(const ScalarFieldTilde & X)804 double integral(const ScalarFieldTilde& X)
805 {	double XdataZero;
806 	#ifdef GPU_ENABLED
807 	cudaMemcpy(&XdataZero, X->dataGpu(false), sizeof(double), cudaMemcpyDeviceToHost);
808 	#else
809 	XdataZero = X->data(false)[0].real();
810 	#endif
811 	return XdataZero * X->gInfo.detR * X->scale;
812 }
integral(const complexScalarField & X)813 complex integral(const complexScalarField& X)
814 {	return X->gInfo.dV * sum(X);
815 }
integral(const complexScalarFieldTilde & X)816 complex integral(const complexScalarFieldTilde& X)
817 {	complex XdataZero;
818 	#ifdef GPU_ENABLED
819 	cudaMemcpy(&XdataZero, X->dataGpu(false), sizeof(complex), cudaMemcpyDeviceToHost);
820 	#else
821 	XdataZero = X->data(false)[0];
822 	#endif
823 	return XdataZero * X->gInfo.detR * X->scale;
824 }
825 
826 //------------------------------ Grid conversion utilities ------------------------------
827 
changeGrid_sub(size_t iStart,size_t iStop,const vector3<int> & S,const vector3<int> & Sin,const vector3<int> & Sout,const complex * in,complex * out)828 void changeGrid_sub(size_t iStart, size_t iStop, const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out)
829 {	THREAD_halfGspaceLoop( changeGrid_calc(iG, Sin, Sout, in, out); )
830 }
changeGrid(const vector3<int> & S,const vector3<int> & Sin,const vector3<int> & Sout,const complex * in,complex * out)831 void changeGrid(const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out)
832 {	threadLaunch(changeGrid_sub, S[0]*S[1]*(1+S[2]/2), S, Sin, Sout, in, out);
833 }
834 #ifdef GPU_ENABLED
835 void changeGrid_gpu(const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out);
836 #endif
changeGrid(const ScalarFieldTilde & in,const GridInfo & gInfoNew)837 ScalarFieldTilde changeGrid(const ScalarFieldTilde& in, const GridInfo& gInfoNew)
838 {	static StopWatch watch("changeGrid"); watch.start();
839 	ScalarFieldTilde out; nullToZero(out, gInfoNew);
840 	assert(gInfoNew.R == in->gInfo.R);
841 	const vector3<int>& Sin = in->gInfo.S;
842 	const vector3<int>& Sout = gInfoNew.S;
843 	vector3<int> Smax; for(int k=0; k<3; k++) Smax[k] = std::max(Sin[k],Sout[k]);
844 	callPref(changeGrid)(Smax, Sin, Sout, in->dataPref(), out->dataPref());
845 	watch.stop();
846 	return out;
847 }
848 
changeGrid(const ScalarField & in,const GridInfo & gInfoNew)849 ScalarField changeGrid(const ScalarField& in, const GridInfo& gInfoNew)
850 {	return I(changeGrid(J(in), gInfoNew));
851 }
852 
853 
changeGridFull_sub(size_t iStart,size_t iStop,const vector3<int> & S,const vector3<int> & Sin,const vector3<int> & Sout,const complex * in,complex * out)854 void changeGridFull_sub(size_t iStart, size_t iStop, const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out)
855 {	THREAD_fullGspaceLoop( changeGridFull_calc(iG, Sin, Sout, in, out); )
856 }
changeGridFull(const vector3<int> & S,const vector3<int> & Sin,const vector3<int> & Sout,const complex * in,complex * out)857 void changeGridFull(const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out)
858 {	threadLaunch(changeGridFull_sub, S[0]*S[1]*S[2], S, Sin, Sout, in, out);
859 }
860 #ifdef GPU_ENABLED
861 void changeGridFull_gpu(const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out);
862 #endif
changeGrid(const complexScalarFieldTilde & in,const GridInfo & gInfoNew)863 complexScalarFieldTilde changeGrid(const complexScalarFieldTilde& in, const GridInfo& gInfoNew)
864 {	static StopWatch watch("changeGridFull"); watch.start();
865 	complexScalarFieldTilde out; nullToZero(out, gInfoNew);
866 	assert(gInfoNew.R == in->gInfo.R);
867 	const vector3<int>& Sin = in->gInfo.S;
868 	const vector3<int>& Sout = gInfoNew.S;
869 	vector3<int> Smax; for(int k=0; k<3; k++) Smax[k] = std::max(Sin[k],Sout[k]);
870 	callPref(changeGridFull)(Smax, Sin, Sout, in->dataPref(), out->dataPref());
871 	watch.stop();
872 	return out;
873 }
874 
changeGrid(const complexScalarField & in,const GridInfo & gInfoNew)875 complexScalarField changeGrid(const complexScalarField& in, const GridInfo& gInfoNew)
876 {	return I(changeGrid(J(in), gInfoNew));
877 }
878 
879 
880 //------------------------------ Initialization utilities ------------------------------
881 
initRandom(ScalarField & X,double cap)882 void initRandom(ScalarField& X, double cap)
883 {	double* Xdata = X->data();
884 	for(int i=0; i<X->nElem; i++)
885 		Xdata[i] = Random::normal(0, 1, cap);
886 }
887 
initRandomFlat(ScalarField & X)888 void initRandomFlat(ScalarField& X)
889 {	double* Xdata = X->data();
890 	for(int i=0; i<X->nElem; i++)
891 		Xdata[i] = Random::uniform();
892 }
893 
initGaussianKernel_sub(int i,double Gsq,double expfac,double * X)894 void initGaussianKernel_sub(int i, double Gsq, double expfac, double* X)
895 {	X[i] = exp(expfac*Gsq);
896 }
initGaussianKernel(RealKernel & X,double x0)897 void initGaussianKernel(RealKernel& X, double x0)
898 {	applyFuncGsq(X.gInfo, initGaussianKernel_sub, -pow(0.5*x0,2), X.data());
899 }
900 
initTranslation_sub(size_t iStart,size_t iStop,const vector3<int> S,const vector3<> Gr,complex * X)901 void initTranslation_sub(size_t iStart, size_t iStop, const vector3<int> S, const vector3<> Gr, complex* X)
902 {	THREAD_halfGspaceLoop( X[i] = cis(-dot(iG,Gr)); )
903 }
initTranslation(ScalarFieldTilde & X,const vector3<> & r)904 void initTranslation(ScalarFieldTilde& X, const vector3<>& r)
905 {	const GridInfo& gInfo = X->gInfo;
906 	threadLaunch(initTranslation_sub, gInfo.nG, gInfo.S, gInfo.G*r, X->data());
907 }
908 
909 
gaussConvolve_sub(size_t iStart,size_t iStop,const vector3<int> & S,const matrix3<> & GGT,complex * data,double sigma)910 void gaussConvolve_sub(size_t iStart, size_t iStop, const vector3<int>& S, const matrix3<>& GGT, complex* data, double sigma)
911 {	THREAD_halfGspaceLoop( data[i] *= exp(-0.5*sigma*sigma*GGT.metric_length_squared(iG)); )
912 }
gaussConvolve(const vector3<int> & S,const matrix3<> & GGT,complex * data,double sigma)913 void gaussConvolve(const vector3<int>& S, const matrix3<>& GGT, complex* data, double sigma)
914 {	threadLaunch(gaussConvolve_sub, S[0]*S[1]*(1+S[2]/2), S, GGT, data, sigma);
915 }
916 #ifdef GPU_ENABLED
917 void gaussConvolve_gpu(const vector3<int>& S, const matrix3<>& GGT, complex* data, double sigma);
918 #endif
gaussConvolve(ScalarFieldTilde && in,double sigma)919 ScalarFieldTilde gaussConvolve(ScalarFieldTilde&& in, double sigma)
920 {	assert(in);
921 	callPref(gaussConvolve)(in->gInfo.S, in->gInfo.GGT, in->dataPref(false), sigma);
922 	return in;
923 }
gaussConvolve(const ScalarFieldTilde & in,double sigma)924 ScalarFieldTilde gaussConvolve(const ScalarFieldTilde& in, double sigma)
925 {	ScalarFieldTilde out(in->clone());
926 	return gaussConvolve((ScalarFieldTilde&&)out, sigma);
927 }
928 
929 //------------------------------ Debug utilities ------------------------------
930 
printStats(const ScalarField & X,const char * name,FILE * fp)931 void printStats(const ScalarField& X, const char* name, FILE* fp)
932 {	int N = X->nElem;
933 	double mean = sum(X)/N;
934 	double stdDev = sqrt(fabs(dot(X,X)/N - mean*mean));
935 	double minVal, maxVal; callPref(eblas_capMinMax)(N, X->dataPref(), minVal, maxVal);
936 	fprintf(fp, "vector %s\t= %.15le +/- %.15le  min: %le  max: %le\n", name, mean, stdDev, minVal, maxVal);
937 }
938 
939 //------------------------------ From VectorField.h ------------------------------
940 
gradient_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> G,const complex * Xtilde,vector3<complex * > gradTilde)941 inline void gradient_sub(size_t iStart, size_t iStop, const vector3<int> S,
942 	const matrix3<> G, const complex* Xtilde, vector3<complex*> gradTilde)
943 {	THREAD_halfGspaceLoop( gradient_calc(i, iG, IS_NYQUIST, G, Xtilde, gradTilde); )
944 }
945 #ifdef GPU_ENABLED //implemented in Operators.cu
946 void gradient_gpu(const vector3<int> S, const matrix3<> G, const complex* Xtilde, vector3<complex*> gradTilde);
947 #endif
gradient(const ScalarFieldTilde & Xtilde)948 VectorFieldTilde gradient(const ScalarFieldTilde& Xtilde)
949 {	const GridInfo& gInfo = Xtilde->gInfo;
950 	VectorFieldTilde gradTilde(gInfo, isGpuEnabled());
951 	#ifdef GPU_ENABLED
952 	gradient_gpu(gInfo.S, gInfo.G, Xtilde->dataGpu(), gradTilde.dataGpu());
953 	#else
954 	threadLaunch(gradient_sub, gInfo.nG, gInfo.S, gInfo.G, Xtilde->data(), gradTilde.data());
955 	#endif
956 	return gradTilde;
957 }
gradient(const ScalarField & X)958 VectorField gradient(const ScalarField& X) { return I(gradient(J(X))); }
959 
960 
divergence_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> G,vector3<const complex * > Vtilde,complex * divTilde)961 inline void divergence_sub(size_t iStart, size_t iStop, const vector3<int> S,
962 	const matrix3<> G, vector3<const complex*> Vtilde, complex* divTilde)
963 {	THREAD_halfGspaceLoop( divergence_calc(i, iG, IS_NYQUIST, G, Vtilde, divTilde); )
964 }
965 #ifdef GPU_ENABLED
966 void divergence_gpu(const vector3<int> S, const matrix3<> G, vector3<const complex*> Vtilde, complex* divTilde);
967 #endif
divergence(const VectorFieldTilde & Vtilde)968 ScalarFieldTilde divergence(const VectorFieldTilde& Vtilde)
969 {	const GridInfo& gInfo = Vtilde[0]->gInfo;
970 	ScalarFieldTilde divTilde(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled()));
971 	#ifdef GPU_ENABLED
972 	divergence_gpu(gInfo.S, gInfo.G, Vtilde.dataGpu(), divTilde->dataGpu());
973 	#else
974 	threadLaunch(divergence_sub, gInfo.nG, gInfo.S, gInfo.G, Vtilde.data(), divTilde->data());
975 	#endif
976 	return divTilde;
977 }
divergence(const VectorField & V)978 ScalarField divergence(const VectorField& V) { return I(divergence(J(V))); }
979 
980 
tensorGradient_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> G,const complex * Xtilde,tensor3<complex * > gradTilde)981 inline void tensorGradient_sub(size_t iStart, size_t iStop, const vector3<int> S,
982 	const matrix3<> G, const complex* Xtilde, tensor3<complex*> gradTilde)
983 {	THREAD_halfGspaceLoop( tensorGradient_calc(i, iG, IS_NYQUIST, G, Xtilde, gradTilde); )
984 }
985 #ifdef GPU_ENABLED //implemented in Operators.cu
986 void tensorGradient_gpu(const vector3<int> S, const matrix3<> G, const complex* Xtilde, tensor3<complex*> gradTilde);
987 #endif
tensorGradient(const ScalarFieldTilde & Xtilde)988 TensorFieldTilde tensorGradient(const ScalarFieldTilde& Xtilde)
989 {	const GridInfo& gInfo = Xtilde->gInfo;
990 	TensorFieldTilde gradTilde(gInfo, isGpuEnabled());
991 	#ifdef GPU_ENABLED
992 	tensorGradient_gpu(gInfo.S, gInfo.G, Xtilde->dataGpu(), gradTilde.dataGpu());
993 	#else
994 	threadLaunch(tensorGradient_sub, gInfo.nG, gInfo.S, gInfo.G, Xtilde->data(), gradTilde.data());
995 	#endif
996 	return gradTilde;
997 }
998 
999 
tensorDivergence_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> G,tensor3<const complex * > Vtilde,complex * divTilde)1000 inline void tensorDivergence_sub(size_t iStart, size_t iStop, const vector3<int> S,
1001 	const matrix3<> G, tensor3<const complex*> Vtilde, complex* divTilde)
1002 {	THREAD_halfGspaceLoop( tensorDivergence_calc(i, iG, IS_NYQUIST, G, Vtilde, divTilde); )
1003 }
1004 #ifdef GPU_ENABLED
1005 void tensorDivergence_gpu(const vector3<int> S, const matrix3<> G, tensor3<const complex*> Vtilde, complex* divTilde);
1006 #endif
tensorDivergence(const TensorFieldTilde & Vtilde)1007 ScalarFieldTilde tensorDivergence(const TensorFieldTilde& Vtilde)
1008 {	const GridInfo& gInfo = Vtilde[0]->gInfo;
1009 	ScalarFieldTilde divTilde(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled()));
1010 	#ifdef GPU_ENABLED
1011 	tensorDivergence_gpu(gInfo.S, gInfo.G, Vtilde.dataGpu(), divTilde->dataGpu());
1012 	#else
1013 	threadLaunch(tensorDivergence_sub, gInfo.nG, gInfo.S, gInfo.G, Vtilde.data(), divTilde->data());
1014 	#endif
1015 	return divTilde;
1016 }
1017 
1018