1 /*-------------------------------------------------------------------
2 Copyright 2011 Ravishankar Sundararaman
3
4 This file is part of JDFTx.
5
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19
20 #include <core/Operators.h>
21 #include <core/Operators_internal.h>
22 #include <core/GridInfo.h>
23 #include <core/VectorField.h>
24 #include <core/ScalarFieldArray.h>
25 #include <core/Random.h>
26 #include <string.h>
27
28 //------------------------------ Conversion operators ------------------------------
29
30
Real(const complexScalarField & C)31 ScalarField Real(const complexScalarField& C)
32 { const GridInfo& gInfo = C->gInfo;
33 ScalarField R; nullToZero(R, gInfo);
34 callPref(eblas_daxpy)(gInfo.nr, C->scale, (const double*)C->dataPref(false), 2, R->dataPref(false), 1);
35 R->scale = 1;
36 return R;
37 }
38
RealG_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * vFull,complex * vHalf,double scaleFac)39 void RealG_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* vFull, complex* vHalf, double scaleFac)
40 { THREAD_halfGspaceLoop( RealG_calc(i, iG, S, vFull, vHalf, scaleFac); )
41 }
42 #ifdef GPU_ENABLED
43 void RealG_gpu(const vector3<int> S, const complex* vFull, complex* vHalf, double scaleFac);
44 #endif
Real(const complexScalarFieldTilde & full)45 ScalarFieldTilde Real(const complexScalarFieldTilde& full)
46 { const GridInfo& gInfo = full->gInfo;
47 ScalarFieldTilde half = ScalarFieldTildeData::alloc(gInfo, isGpuEnabled());
48 #ifdef GPU_ENABLED
49 RealG_gpu(gInfo.S, full->dataGpu(false), half->dataGpu(false), full->scale);
50 #else
51 threadLaunch(RealG_sub, gInfo.nG, gInfo.S, full->data(false), half->data(false), full->scale);
52 #endif
53 half->scale = 1;
54 return half;
55 }
56
Imag(const complexScalarField & C)57 ScalarField Imag(const complexScalarField& C)
58 { const GridInfo& gInfo = C->gInfo;
59 ScalarField I; nullToZero(I, gInfo);
60 callPref(eblas_daxpy)(gInfo.nr, C->scale, ((const double*)C->dataPref(false))+1, 2, I->dataPref(false), 1);
61 I->scale = 1;
62 return I;
63 }
64
ImagG_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * vFull,complex * vHalf,double scaleFac)65 void ImagG_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* vFull, complex* vHalf, double scaleFac)
66 { THREAD_halfGspaceLoop( ImagG_calc(i, iG, S, vFull, vHalf, scaleFac); )
67 }
68 #ifdef GPU_ENABLED
69 void ImagG_gpu(const vector3<int> S, const complex* vFull, complex* vHalf, double scaleFac);
70 #endif
Imag(const complexScalarFieldTilde & full)71 ScalarFieldTilde Imag(const complexScalarFieldTilde& full)
72 { const GridInfo& gInfo = full->gInfo;
73 ScalarFieldTilde half = ScalarFieldTildeData::alloc(gInfo, isGpuEnabled());
74 #ifdef GPU_ENABLED
75 ImagG_gpu(gInfo.S, full->dataGpu(false), half->dataGpu(false), full->scale);
76 #else
77 threadLaunch(ImagG_sub, gInfo.nG, gInfo.S, full->data(false), half->data(false), full->scale);
78 #endif
79 half->scale = 1;
80 return half;
81 }
82
Complex(const ScalarField & R)83 complexScalarField Complex(const ScalarField& R)
84 { const GridInfo& gInfo = R->gInfo;
85 complexScalarField C; nullToZero(C, gInfo);
86 callPref(eblas_daxpy)(gInfo.nr, R->scale, R->dataPref(false), 1, (double*)C->dataPref(false), 2);
87 C->scale = 1;
88 return C;
89 }
90
Complex(const ScalarField & re,const ScalarField & im)91 complexScalarField Complex(const ScalarField& re, const ScalarField& im)
92 { const GridInfo& gInfo = re->gInfo;
93 complexScalarField C; nullToZero(C, gInfo);
94 callPref(eblas_daxpy)(gInfo.nr, re->scale, re->dataPref(false), 1, (double*)(C->dataPref(false))+0, 2);
95 callPref(eblas_daxpy)(gInfo.nr, im->scale, im->dataPref(false), 1, (double*)(C->dataPref(false))+1, 2);
96 C->scale = 1;
97 return C;
98 }
99
ComplexG_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * vHalf,complex * vFull,double scaleFac)100 void ComplexG_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* vHalf, complex* vFull, double scaleFac)
101 { THREAD_halfGspaceLoop( ComplexG_calc(i, iG, S, vHalf, vFull, scaleFac); )
102 }
103 #ifdef GPU_ENABLED
104 void ComplexG_gpu(const vector3<int> S, const complex* vHalf, complex* vFull, double scaleFac);
105 #endif
Complex(const ScalarFieldTilde & half)106 complexScalarFieldTilde Complex(const ScalarFieldTilde& half)
107 { const GridInfo& gInfo = half->gInfo;
108 complexScalarFieldTilde full = complexScalarFieldTildeData::alloc(gInfo, isGpuEnabled());
109 #ifdef GPU_ENABLED
110 ComplexG_gpu(gInfo.S, half->dataGpu(false), full->dataGpu(false), half->scale);
111 #else
112 threadLaunch(ComplexG_sub, gInfo.nG, gInfo.S, half->data(false), full->data(false), half->scale);
113 #endif
114 full->scale = 1;
115 return full;
116 }
117
118
119 //------------------------------ Linear Unary operators ------------------------------
120
O(const ScalarFieldTilde & in)121 ScalarFieldTilde O(const ScalarFieldTilde& in) { return in * in->gInfo.detR; }
O(ScalarFieldTilde && in)122 ScalarFieldTilde O(ScalarFieldTilde&& in) { return in *= in->gInfo.detR; }
O(const complexScalarFieldTilde & in)123 complexScalarFieldTilde O(const complexScalarFieldTilde& in) { return in * in->gInfo.detR; }
O(complexScalarFieldTilde && in)124 complexScalarFieldTilde O(complexScalarFieldTilde&& in) { return in *= in->gInfo.detR; }
125
126
127 //Forward transform
I(ScalarFieldTilde && in,int nThreads)128 ScalarField I(ScalarFieldTilde&& in, int nThreads)
129 { //c2r transforms may destroy input, but this input can be destroyed
130 ScalarField out(ScalarFieldData::alloc(in->gInfo, isGpuEnabled()));
131 #ifdef GPU_ENABLED
132 cufftExecZ2D(in->gInfo.planZ2D, (double2*)in->dataGpu(false), out->dataGpu(false));
133 #else
134 if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
135 fftw_execute_dft_c2r(in->gInfo.getPlan(GridInfo::PlanCtoR, nThreads),
136 (fftw_complex*)in->data(false), out->data(false));
137 #endif
138 out->scale = in->scale;
139 return out;
140 }
I(const ScalarFieldTilde & in,int nThreads)141 ScalarField I(const ScalarFieldTilde& in, int nThreads)
142 { //c2r transforms may destroy input, hence copy input (and let that copy be destroyed above)
143 return I(in->clone(), nThreads);
144 }
I(const complexScalarFieldTilde & in,int nThreads)145 complexScalarField I(const complexScalarFieldTilde& in, int nThreads)
146 { complexScalarField out(complexScalarFieldData::alloc(in->gInfo, isGpuEnabled()));
147 #ifdef GPU_ENABLED
148 cufftExecZ2Z(in->gInfo.planZ2Z, (double2*)in->dataGpu(false), (double2*)out->dataGpu(false), CUFFT_INVERSE);
149 #else
150 if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
151 fftw_execute_dft(in->gInfo.getPlan(GridInfo::PlanInverse, nThreads),
152 (fftw_complex*)in->data(false), (fftw_complex*)out->data(false));
153 #endif
154 out->scale = in->scale;
155 return out;
156 }
I(complexScalarFieldTilde && in,int nThreads)157 complexScalarField I(complexScalarFieldTilde&& in, int nThreads)
158 { //Destructible input (transform in place):
159 #ifdef GPU_ENABLED
160 cufftExecZ2Z(in->gInfo.planZ2Z, (double2*)in->dataGpu(false), (double2*)in->dataGpu(false), CUFFT_INVERSE);
161 #else
162 if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
163 fftw_execute_dft(in->gInfo.getPlan(GridInfo::PlanInverseInPlace, nThreads),
164 (fftw_complex*)in->data(false), (fftw_complex*)in->data(false));
165 #endif
166 return std::static_pointer_cast<complexScalarFieldData>(std::static_pointer_cast<FieldData<complex>>(in));
167 }
168
169 //Forward transform h.c.
Idag(const ScalarField & in,int nThreads)170 ScalarFieldTilde Idag(const ScalarField& in, int nThreads)
171 { //r2c transform does not destroy input (no backing up needed)
172 ScalarFieldTilde out(ScalarFieldTildeData::alloc(in->gInfo, isGpuEnabled()));
173 #ifdef GPU_ENABLED
174 cufftExecD2Z(in->gInfo.planD2Z, in->dataGpu(false), (double2*)out->dataGpu(false));
175 #else
176 if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
177 fftw_execute_dft_r2c(in->gInfo.getPlan(GridInfo::PlanRtoC, nThreads),
178 in->data(false), (fftw_complex*)out->data(false));
179 #endif
180 out->scale = in->scale;
181 return out;
182 }
Idag(const complexScalarField & in,int nThreads)183 complexScalarFieldTilde Idag(const complexScalarField& in, int nThreads)
184 { complexScalarFieldTilde out(complexScalarFieldTildeData::alloc(in->gInfo, isGpuEnabled()));
185 #ifdef GPU_ENABLED
186 cufftExecZ2Z(in->gInfo.planZ2Z, (double2*)in->dataGpu(false), (double2*)out->dataGpu(false), CUFFT_FORWARD);
187 #else
188 if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
189 fftw_execute_dft(in->gInfo.getPlan(GridInfo::PlanForward, nThreads),
190 (fftw_complex*)in->data(false), (fftw_complex*)out->data(false));
191 #endif
192 out->scale = in->scale;
193 return out;
194 }
Idag(complexScalarField && in,int nThreads)195 complexScalarFieldTilde Idag(complexScalarField&& in, int nThreads)
196 { //Destructible input (transform in place):
197 #ifdef GPU_ENABLED
198 cufftExecZ2Z(in->gInfo.planZ2Z, (double2*)in->dataGpu(false), (double2*)in->dataGpu(false), CUFFT_FORWARD);
199 #else
200 if(!nThreads) nThreads = shouldThreadOperators() ? nProcsAvailable : 1;
201 fftw_execute_dft(in->gInfo.getPlan(GridInfo::PlanForwardInPlace, nThreads),
202 (fftw_complex*)in->data(false), (fftw_complex*)in->data(false));
203 #endif
204 return std::static_pointer_cast<complexScalarFieldTildeData>(std::static_pointer_cast<FieldData<complex>>(in));
205 }
206
207 //Reverse transform (same as Idag upto the normalization factor)
J(const ScalarField & in,int nThreads)208 ScalarFieldTilde J(const ScalarField& in, int nThreads) { return (1.0/in->gInfo.nr)*Idag(in, nThreads); }
J(const complexScalarField & in,int nThreads)209 complexScalarFieldTilde J(const complexScalarField& in, int nThreads) { return (1.0/in->gInfo.nr)*Idag(in, nThreads); }
J(complexScalarField && in,int nThreads)210 complexScalarFieldTilde J(complexScalarField&& in, int nThreads) { return Idag((complexScalarField&&)(in *= 1.0/in->gInfo.nr), nThreads); }
211
212 //Reverse transform h.c. (same as I upto the normalization factor)
Jdag(const ScalarFieldTilde & in,int nThreads)213 ScalarField Jdag(const ScalarFieldTilde& in, int nThreads) { return (1.0/in->gInfo.nr)*I(in, nThreads); }
Jdag(ScalarFieldTilde && in,int nThreads)214 ScalarField Jdag(ScalarFieldTilde&& in, int nThreads) { return (1.0/in->gInfo.nr)*I(in, nThreads); }
Jdag(const complexScalarFieldTilde & in,int nThreads)215 complexScalarField Jdag(const complexScalarFieldTilde& in, int nThreads) { return (1.0/in->gInfo.nr)*I(in, nThreads); }
Jdag(complexScalarFieldTilde && in,int nThreads)216 complexScalarField Jdag(complexScalarFieldTilde&& in, int nThreads) { return I((complexScalarFieldTilde&&)(in *= 1.0/in->gInfo.nr), nThreads); }
217
JdagOJ(const ScalarField & in)218 ScalarField JdagOJ(const ScalarField& in) { return in * in->gInfo.dV; }
JdagOJ(ScalarField && in)219 ScalarField JdagOJ(ScalarField&& in) { return in *= in->gInfo.dV; }
JdagOJ(const complexScalarField & in)220 complexScalarField JdagOJ(const complexScalarField& in) { return in * in->gInfo.dV; }
JdagOJ(complexScalarField && in)221 complexScalarField JdagOJ(complexScalarField&& in) { return in *= in->gInfo.dV; }
222
223
224
L_sub(int i,double Gsq,complex * v)225 inline void L_sub(int i, double Gsq, complex* v)
226 { v[i] *= Gsq;
227 }
228 #ifdef GPU_ENABLED //implemented in Operators.cu
229 void L_gpu(const vector3<int> S, const matrix3<> GGT, complex* v);
230 #endif
L(ScalarFieldTilde && in)231 ScalarFieldTilde L(ScalarFieldTilde&& in)
232 { const GridInfo& gInfo = in->gInfo;
233 in *= -gInfo.detR;
234 #ifdef GPU_ENABLED
235 L_gpu(gInfo.S, gInfo.GGT, in->dataGpu(false));
236 #else
237 applyFuncGsq(gInfo, L_sub, in->data(false));
238 #endif
239 return in;
240 }
L(const ScalarFieldTilde & in)241 ScalarFieldTilde L(const ScalarFieldTilde& in) { return L(in->clone()); }
242
Linv_sub(int i,double Gsq,complex * v)243 inline void Linv_sub(int i, double Gsq, complex* v)
244 { if(i==0) v[i]=0.0;
245 else v[i] /= Gsq;
246 }
247 #ifdef GPU_ENABLED //implemented in Operators.cu
248 void Linv_gpu(const vector3<int> S, const matrix3<> GGT, complex* v);
249 #endif
Linv(ScalarFieldTilde && in)250 ScalarFieldTilde Linv(ScalarFieldTilde&& in)
251 { const GridInfo& gInfo = in->gInfo;
252 in *= (-1.0/gInfo.detR);
253 #ifdef GPU_ENABLED
254 Linv_gpu(gInfo.S, gInfo.GGT, in->dataGpu(false));
255 #else
256 applyFuncGsq(gInfo, Linv_sub, in->data(false));
257 #endif
258 return in;
259 }
Linv(const ScalarFieldTilde & in)260 ScalarFieldTilde Linv(const ScalarFieldTilde& in) { return Linv(in->clone()); }
261
262
fullL_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> GGT,complex * v)263 void fullL_sub(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<> GGT, complex* v)
264 { THREAD_fullGspaceLoop( v[i] *= GGT.metric_length_squared(iG); )
265 }
266 #ifdef GPU_ENABLED //implemented in Operators.cu
267 void fullL_gpu(const vector3<int> S, const matrix3<> GGT, complex* v);
268 #endif
L(complexScalarFieldTilde && in)269 complexScalarFieldTilde L(complexScalarFieldTilde&& in)
270 { const GridInfo& gInfo = in->gInfo;
271 in *= -gInfo.detR;
272 #ifdef GPU_ENABLED
273 fullL_gpu(gInfo.S, gInfo.GGT, in->dataGpu(false));
274 #else
275 threadLaunch(fullL_sub, gInfo.nr, gInfo.S, gInfo.GGT, in->data(false));
276 #endif
277 return in;
278 }
L(const complexScalarFieldTilde & in)279 complexScalarFieldTilde L(const complexScalarFieldTilde& in) { return L(in->clone()); }
280
fullLinv_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> GGT,complex * v)281 void fullLinv_sub(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<> GGT, complex* v)
282 { THREAD_fullGspaceLoop( v[i] *= i ? (1.0/GGT.metric_length_squared(iG)) : 0.0; )
283 }
284 #ifdef GPU_ENABLED //implemented in Operators.cu
285 void fullLinv_gpu(const vector3<int> S, const matrix3<> GGT, complex* v);
286 #endif
Linv(complexScalarFieldTilde && in)287 complexScalarFieldTilde Linv(complexScalarFieldTilde&& in)
288 { const GridInfo& gInfo = in->gInfo;
289 in *= (-1.0/gInfo.detR);
290 #ifdef GPU_ENABLED
291 fullLinv_gpu(gInfo.S, gInfo.GGT, in->dataGpu(false));
292 #else
293 threadLaunch(fullLinv_sub, gInfo.nr, gInfo.S, gInfo.GGT, in->data(false));
294 #endif
295 return in;
296 }
Linv(const complexScalarFieldTilde & in)297 complexScalarFieldTilde Linv(const complexScalarFieldTilde& in) { return Linv(in->clone()); }
298
299
Lstress_thread(size_t iStart,size_t iStop,const vector3<int> S,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)300 void Lstress_thread(size_t iStart, size_t iStop, const vector3<int> S, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
301 { THREAD_halfGspaceLoop
302 ( double weight = ((iG[2]==0) or (2*iG[2]==S[2])) ? 1 : 2; //weight factor for points in reduced reciprocal space of real scalar fields
303 grad_RRT[i] = (weight * real(X[i].conj() * Y[i])) * outer(vector3<>(iG));
304 )
305 }
Lstress(const vector3<int> S,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)306 inline void Lstress(const vector3<int> S, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
307 { threadLaunch(Lstress_thread, S[0]*S[1]*(S[2]/2+1), S, X, Y, grad_RRT);
308 }
309 #ifdef GPU_ENABLED //implemented in Operators.cu
310 void Lstress_gpu(const vector3<int> S, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT);
311 #endif
Lstress(const ScalarFieldTilde & X,const ScalarFieldTilde & Y)312 matrix3<> Lstress(const ScalarFieldTilde& X, const ScalarFieldTilde& Y)
313 { const GridInfo& gInfo = X->gInfo;
314 ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
315 callPref(Lstress)(gInfo.S, X->dataPref(), Y->dataPref(), result.dataPref());
316 matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
317 return (2.*gInfo.detR) * (gInfo.GT * resultSum * gInfo.G);
318 }
319
LinvStress_thread(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> & GGT,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)320 void LinvStress_thread(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<>& GGT, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
321 { THREAD_halfGspaceLoop
322 ( double weight = ((iG[2]==0) or (2*iG[2]==S[2])) ? 1 : 2; //weight factor for points in reduced reciprocal space of real scalar fields
323 double Gsq = GGT.metric_length_squared(iG);
324 double GsqInv = Gsq ? 1./Gsq : 0;
325 grad_RRT[i] = (weight * real(X[i].conj() * Y[i]) * (-GsqInv*GsqInv)) * outer(vector3<>(iG));
326 )
327 }
LinvStress(const vector3<int> S,const matrix3<> & GGT,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)328 inline void LinvStress(const vector3<int> S, const matrix3<>& GGT, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
329 { threadLaunch(LinvStress_thread, S[0]*S[1]*(S[2]/2+1), S, GGT, X, Y, grad_RRT);
330 }
331 #ifdef GPU_ENABLED //implemented in Operators.cu
332 void LinvStress_gpu(const vector3<int> S, const matrix3<>& GGT, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT);
333 #endif
LinvStress(const ScalarFieldTilde & X,const ScalarFieldTilde & Y)334 matrix3<> LinvStress(const ScalarFieldTilde& X, const ScalarFieldTilde& Y)
335 { const GridInfo& gInfo = X->gInfo;
336 ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
337 callPref(LinvStress)(gInfo.S, gInfo.GGT, X->dataPref(), Y->dataPref(), result.dataPref());
338 matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
339 return (2.*gInfo.detR) * (gInfo.GT * resultSum * gInfo.G);
340 }
341
342
zeroNyquist_sub(size_t iStart,size_t iStop,const vector3<int> S,Scalar * data)343 template<typename Scalar> void zeroNyquist_sub(size_t iStart, size_t iStop, const vector3<int> S, Scalar* data)
344 { THREAD_halfGspaceLoop( if(IS_NYQUIST) data[i] = Scalar(0.0); )
345 }
zeroNyquist(RealKernel & K)346 void zeroNyquist(RealKernel& K)
347 { threadLaunch(zeroNyquist_sub<double>, K.gInfo.nG, K.gInfo.S, K.data());
348 }
zeroNyquist(ScalarFieldTilde & Gptr)349 void zeroNyquist(ScalarFieldTilde& Gptr)
350 { const GridInfo& gInfo = Gptr->gInfo;
351 threadLaunch(zeroNyquist_sub<complex>, gInfo.nG, gInfo.S, Gptr->data());
352 }
zeroNyquist(ScalarField & Rptr)353 void zeroNyquist(ScalarField& Rptr) { ScalarFieldTilde Rtilde=J(Rptr); zeroNyquist(Rtilde); Rptr = I((ScalarFieldTilde&&)Rtilde); }
354
removePhase(size_t N,complex * data,double & meanPhase,double & sigmaPhase,double & rmsImagErr)355 void removePhase(size_t N, complex* data, double& meanPhase, double& sigmaPhase, double& rmsImagErr)
356 { //Find mean phase
357 double w0=0.0, r1=0.0, r2=0.0, i1=0.0, i2=0.0; //moments of normalized real and imaginary parts
358 for(size_t i=0; i<N; i++)
359 { complex c = data[i]*data[i];
360 double w = abs(c); //weight
361 if(w > 1e-300)
362 { w0 += w;
363 r1 += c.real();
364 i1 += c.imag();
365 r2 += c.real() * c.real() / w;
366 i2 += c.imag() * c.imag() / w;
367 }
368 }
369 double rMean=r1/w0, rSigma=sqrt(std::max(0.,r2/w0-pow(rMean,2)));
370 double iMean=i1/w0, iSigma=sqrt(std::max(0.,i2/w0-pow(iMean,2)));
371 meanPhase = 0.5*atan2(iMean, rMean);
372 sigmaPhase = 0.5*hypot(iMean*rSigma, rMean*iSigma)/(pow(rMean,2) + pow(iMean,2));
373
374 //Remove phase:
375 complex phaseCompensate = cis(-meanPhase);
376 double imSqSum=0.0, reSqSum=0.0;
377 for(size_t i=0; i<N; i++)
378 { complex& c = data[i];
379 c *= phaseCompensate;
380 reSqSum += pow(c.real(),2);
381 imSqSum += pow(c.imag(),2);
382 c = c.real();
383 }
384 rmsImagErr = sqrt(imSqSum/(imSqSum+reSqSum));
385 }
386
387
388 //------------------------------ Spatial gradients of scalar fields ---------------------------------
389
390 //first cartesian derivative
D_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * in,complex * out,vector3<> Ge)391 void D_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* in, complex* out, vector3<> Ge)
392 { THREAD_halfGspaceLoop( D_calc(i, iG, in, out, Ge); )
393 }
394 #ifdef GPU_ENABLED
395 void D_gpu(const vector3<int> S, const complex* in, complex* out, vector3<> Ge);
396 #endif
D(const ScalarFieldTilde & in,int iDir)397 ScalarFieldTilde D(const ScalarFieldTilde& in, int iDir)
398 { const GridInfo& gInfo = in->gInfo;
399 ScalarFieldTilde out(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled()));
400 #ifdef GPU_ENABLED
401 D_gpu(gInfo.S, in->dataGpu(), out->dataGpu(), gInfo.G.column(iDir));
402 #else
403 threadLaunch(D_sub, gInfo.nG, gInfo.S, in->data(), out->data(), gInfo.G.column(iDir));
404 #endif
405 return out;
406 }
407
408 //second cartesian derivative
DD_sub(size_t iStart,size_t iStop,const vector3<int> S,const complex * in,complex * out,vector3<> Ge1,vector3<> Ge2)409 void DD_sub(size_t iStart, size_t iStop, const vector3<int> S, const complex* in, complex* out, vector3<> Ge1, vector3<> Ge2)
410 { THREAD_halfGspaceLoop( DD_calc(i, iG, in, out, Ge1, Ge2); )
411 }
412 #ifdef GPU_ENABLED
413 void DD_gpu(const vector3<int> S, const complex* in, complex* out, vector3<> g, vector3<> q);
414 #endif
DD(const ScalarFieldTilde & in,int iDir,int jDir)415 ScalarFieldTilde DD(const ScalarFieldTilde& in, int iDir, int jDir)
416 { const GridInfo& gInfo = in->gInfo;
417 ScalarFieldTilde out(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled()));
418 #ifdef GPU_ENABLED
419 DD_gpu(gInfo.S, in->dataGpu(), out->dataGpu(), gInfo.G.column(iDir), gInfo.G.column(jDir));
420 #else
421 threadLaunch(DD_sub, gInfo.nG, gInfo.S, in->data(), out->data(), gInfo.G.column(iDir), gInfo.G.column(jDir));
422 #endif
423 return out;
424 }
425
426
lGradient_sub(size_t iStart,size_t iStop,const vector3<int> & S,const complex * in,const array<complex *,2* l+1> & out,const matrix3<> & G)427 template<int l> void lGradient_sub(size_t iStart, size_t iStop, const vector3<int>& S, const complex* in, const array<complex*, 2*l+1>& out, const matrix3<>& G)
428 { THREAD_halfGspaceLoop( lGradient_calc<l>(i, iG, IS_NYQUIST, in, out, G); )
429 }
lGradient(const vector3<int> & S,const complex * in,array<complex *,2* l+1> out,const matrix3<> & G)430 template<int l> void lGradient(const vector3<int>& S, const complex* in, array<complex*, 2*l+1> out, const matrix3<>& G)
431 { threadLaunch(lGradient_sub<l>, S[0]*S[1]*(S[2]/2+1), S, in, out, G);
432 }
lGradient(const vector3<int> & S,const complex * in,std::vector<complex * > out,int l,const matrix3<> & G)433 void lGradient(const vector3<int>& S, const complex* in, std::vector<complex*> out, int l, const matrix3<>& G)
434 { SwitchTemplate_l(l, lGradient, (S, in, out, G))
435 }
436 #ifdef GPU_ENABLED
437 void lGradient_gpu(const vector3<int>& S, const complex* in, std::vector<complex*> out, int l, const matrix3<>& G);
438 #endif
439
lGradient(const ScalarFieldTilde & in,int l)440 ScalarFieldTildeArray lGradient(const ScalarFieldTilde& in, int l)
441 { ScalarFieldTildeArray out; nullToZero(out, in->gInfo, 2*l+1);
442 callPref(lGradient)(in->gInfo.S, in->dataPref(), dataPref(out), l, in->gInfo.G);
443 return out;
444 }
445
lDivergence_sub(size_t iStart,size_t iStop,const vector3<int> & S,const array<const complex *,2* l+1> & in,complex * out,const matrix3<> & G)446 template<int l> void lDivergence_sub(size_t iStart, size_t iStop, const vector3<int>& S, const array<const complex*,2*l+1>& in, complex* out, const matrix3<>& G)
447 { THREAD_halfGspaceLoop( lDivergence_calc<l>(i, iG, IS_NYQUIST, in, out, G); )
448 }
lDivergence(const vector3<int> & S,array<const complex *,2* l+1> in,complex * out,const matrix3<> & G)449 template<int l> void lDivergence(const vector3<int>& S, array<const complex*,2*l+1> in, complex* out, const matrix3<>& G)
450 { threadLaunch(lDivergence_sub<l>, S[0]*S[1]*(S[2]/2+1), S, in, out, G);
451 }
lDivergence(const vector3<int> & S,const std::vector<const complex * > & in,complex * out,int l,const matrix3<> & G)452 void lDivergence(const vector3<int>& S, const std::vector<const complex*>& in, complex* out, int l, const matrix3<>& G)
453 { SwitchTemplate_l(l, lDivergence, (S, in, out, G))
454 }
455 #ifdef GPU_ENABLED
456 void lDivergence_gpu(const vector3<int>& S, const std::vector<const complex*>& in, complex* out, int l, const matrix3<>& G);
457 #endif
458
lDivergence(const ScalarFieldTildeArray & in,int l)459 ScalarFieldTilde lDivergence(const ScalarFieldTildeArray& in, int l)
460 { assert(int(in.size()) == 2*l+1);
461 ScalarFieldTilde out; nullToZero(out, in[0]->gInfo);
462 callPref(lDivergence)(in[0]->gInfo.S, constDataPref(in), out->dataPref(), l, in[0]->gInfo.G);
463 return out;
464 }
465
466
lGradientStress_sub(size_t iStart,size_t iStop,const vector3<int> & S,const matrix3<> & G,const RadialFunctionG & w,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)467 template<int l, int m> void lGradientStress_sub(size_t iStart, size_t iStop, const vector3<int>& S, const matrix3<>& G,
468 const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
469 {
470 const complex lPhase = cis(l*0.5*M_PI);
471 THREAD_halfGspaceLoop(
472 double weight = real(conj(X[i]) * Y[i] * lPhase) * (IS_NYQUIST
473 ? 0 //drop nyquist frequency contributions
474 : (iG[2]==0 ? 1 : 2) ); //reciprocal space weights
475 grad_RRT[i] = weight * lGradientStress_calc<l*(l+1)+m>(iG, G, w);
476 )
477 }
lGradientStress(const vector3<int> & S,const matrix3<> & G,const RadialFunctionG & w,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)478 template<int l, int m> void lGradientStress(const vector3<int>& S, const matrix3<>& G, const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
479 { threadLaunch(lGradientStress_sub<l,m>, S[0]*S[1]*(S[2]/2+1), S, G, w, X, Y, grad_RRT);
480 }
lGradientStress(const vector3<int> & S,const matrix3<> & G,const RadialFunctionG & w,const complex * X,const complex * Y,int l,int m,symmetricMatrix3<> * grad_RRT)481 void lGradientStress(const vector3<int>& S, const matrix3<>& G, const RadialFunctionG& w, const complex* X, const complex* Y, int l, int m, symmetricMatrix3<>* grad_RRT)
482 { SwitchTemplate_lm(l, m, lGradientStress, (S, G, w, X, Y, grad_RRT))
483 }
484 #ifdef GPU_ENABLED
485 void lGradientStress_gpu(const vector3<int>& S, const matrix3<>& G, const RadialFunctionG& w, const complex* X, const complex* Y, int l, int m, symmetricMatrix3<>* grad_RRT);
486 #endif
lGradientStress(const RadialFunctionG & w,const ScalarFieldTilde & X,const ScalarFieldTilde & Y,int l,int m)487 matrix3<> lGradientStress(const RadialFunctionG& w, const ScalarFieldTilde& X, const ScalarFieldTilde& Y, int l, int m)
488 { const GridInfo& gInfo = X->gInfo;
489 ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
490 callPref(lGradientStress)(gInfo.S, gInfo.G, w, X->dataPref(), Y->dataPref(), l, m, result.dataPref());
491 matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
492 return (-gInfo.detR) * resultSum;
493 }
494
495
multiplyBlochPhase_sub(size_t iStart,size_t iStop,const vector3<int> & S,const vector3<> & invS,complex * v,const vector3<> & k)496 void multiplyBlochPhase_sub(size_t iStart, size_t iStop,
497 const vector3<int>& S, const vector3<>& invS, complex* v, const vector3<>& k)
498 { THREAD_rLoop( v[i] *= blochPhase_calc(iv, invS, k); )
499 }
500 #ifdef GPU_ENABLED
501 void multiplyBlochPhase_gpu(const vector3<int>& S, const vector3<>& invS, complex* v, const vector3<>& k);
502 #endif
multiplyBlochPhase(complexScalarField & v,const vector3<> & k)503 void multiplyBlochPhase(complexScalarField& v, const vector3<>& k)
504 { const GridInfo& gInfo = v->gInfo;
505 vector3<> invS(1./gInfo.S[0], 1./gInfo.S[1], 1./gInfo.S[2]);
506 #ifdef GPU_ENABLED
507 multiplyBlochPhase_gpu(gInfo.S, invS, v->dataGpu(), k);
508 #else
509 threadLaunch(multiplyBlochPhase_sub, gInfo.nr, gInfo.S, invS, v->data(), k);
510 #endif
511 }
512
513 //------ RadialFunction related operators ------
514
radialFunction_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> & GGT,complex * F,const RadialFunctionG & f,vector3<> r0)515 void radialFunction_sub(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<>& GGT,
516 complex* F, const RadialFunctionG& f, vector3<> r0 )
517 { THREAD_halfGspaceLoop( F[i] = radialFunction_calc(iG, GGT, f, r0); )
518 }
519 #ifdef GPU_ENABLED
520 void radialFunction_gpu(const vector3<int> S, const matrix3<>& GGT,
521 complex* F, const RadialFunctionG& f, vector3<> r0);
522 #endif
radialFunctionG(const GridInfo & gInfo,const RadialFunctionG & f,vector3<> r0)523 ScalarFieldTilde radialFunctionG(const GridInfo& gInfo, const RadialFunctionG& f, vector3<> r0)
524 {
525 ScalarFieldTilde F(ScalarFieldTildeData::alloc(gInfo,isGpuEnabled()));
526 #ifdef GPU_ENABLED
527 radialFunction_gpu(gInfo.S, gInfo.GGT, F->dataGpu(), f, r0);
528 #else
529 threadLaunch(radialFunction_sub, gInfo.nG, gInfo.S, gInfo.GGT, F->data(), f, r0);
530 #endif
531 return F;
532 }
533
radialFunction(const GridInfo & gInfo,const RadialFunctionG & f,vector3<> r0)534 ScalarField radialFunction(const GridInfo& gInfo, const RadialFunctionG& f, vector3<> r0)
535 {
536 ScalarFieldTilde F = radialFunctionG(gInfo, f, r0);
537 return (1.0/gInfo.detR) * I(F);
538 }
539
radialFunctionG(const RadialFunctionG & f,RealKernel & Kernel)540 void radialFunctionG(const RadialFunctionG& f, RealKernel& Kernel)
541 {
542 ScalarFieldTilde F = radialFunctionG(Kernel.gInfo, f, vector3<>(0,0,0));
543 const complex* FData = F->data(); //put F into Kernel
544 double* KernelData = Kernel.data();
545 for(int i=0; i<Kernel.gInfo.nG; i++)
546 KernelData[i] = FData[i].real();
547 }
548
549
radialFunctionMultiply_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> & GGT,complex * in,const RadialFunctionG & f)550 void radialFunctionMultiply_sub(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<>& GGT,
551 complex* in, const RadialFunctionG& f)
552 { THREAD_halfGspaceLoop( in[i] *= f(sqrt(GGT.metric_length_squared(iG))); )
553 }
554 #ifdef GPU_ENABLED
555 void radialFunctionMultiply_gpu(const vector3<int> S, const matrix3<>& GGT, complex* in, const RadialFunctionG& f);
556 #endif
557
operator *(const RadialFunctionG & f,ScalarFieldTilde && in)558 ScalarFieldTilde operator*(const RadialFunctionG& f, ScalarFieldTilde&& in)
559 { const GridInfo& gInfo = in->gInfo;
560 #ifdef GPU_ENABLED
561 radialFunctionMultiply_gpu(gInfo.S, gInfo.GGT, in->dataGpu(), f);
562 #else
563 threadLaunch(radialFunctionMultiply_sub, gInfo.nG, gInfo.S, gInfo.GGT, in->data(), f);
564 #endif
565 return in;
566 }
567
operator *(const RadialFunctionG & f,const ScalarFieldTilde & in)568 ScalarFieldTilde operator*(const RadialFunctionG& f, const ScalarFieldTilde& in)
569 { ScalarFieldTilde out(in->clone()); //destructible copy
570 return f * ((ScalarFieldTilde&&)out);
571 }
572
operator *(const RadialFunctionG & f,VectorFieldTilde && in)573 VectorFieldTilde operator*(const RadialFunctionG& f, VectorFieldTilde&& in)
574 { for(int k=0; k<3; k++) in[k] = f * (ScalarFieldTilde&&)in[k];
575 return in;
576 }
577
operator *(const RadialFunctionG & f,const VectorFieldTilde & in)578 VectorFieldTilde operator*(const RadialFunctionG& f, const VectorFieldTilde& in)
579 { VectorFieldTilde out;
580 for(int k=0; k<3; k++) out[k] = f * in[k];
581 return out;
582 }
583
584 //Stress due to dot(X,O(w*Y))
convolveStress_thread(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> & GGT,const RadialFunctionG & w,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)585 void convolveStress_thread(size_t iStart, size_t iStop, const vector3<int> S, const matrix3<>& GGT, const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
586 { THREAD_halfGspaceLoop
587 ( double weight = ((iG[2]==0) or (2*iG[2]==S[2])) ? 1 : 2; //weight factor for points in reduced reciprocal space of real scalar fields
588 double G = sqrt(GGT.metric_length_squared(iG));
589 double minus_wPrime_by_G = G ? (-w.deriv(G)/G) : 0.;
590 grad_RRT[i] = (weight * minus_wPrime_by_G * real(X[i].conj() * Y[i])) * outer(vector3<>(iG));
591 )
592 }
convolveStress(const vector3<int> S,const matrix3<> & GGT,const RadialFunctionG & w,const complex * X,const complex * Y,symmetricMatrix3<> * grad_RRT)593 inline void convolveStress(const vector3<int> S, const matrix3<>& GGT, const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT)
594 { threadLaunch(convolveStress_thread, S[0]*S[1]*(S[2]/2+1), S, GGT, w, X, Y, grad_RRT);
595 }
596 #ifdef GPU_ENABLED //implemented in Operators.cu
597 void convolveStress_gpu(const vector3<int> S, const matrix3<>& GGT, const RadialFunctionG& w, const complex* X, const complex* Y, symmetricMatrix3<>* grad_RRT);
598 #endif
convolveStress(const RadialFunctionG & w,const ScalarFieldTilde & X,const ScalarFieldTilde & Y)599 matrix3<> convolveStress(const RadialFunctionG& w, const ScalarFieldTilde& X, const ScalarFieldTilde& Y)
600 { const GridInfo& gInfo = X->gInfo;
601 ManagedArray<symmetricMatrix3<>> result; result.init(gInfo.nG, isGpuEnabled());
602 callPref(convolveStress)(gInfo.S, gInfo.GGT, w, X->dataPref(), Y->dataPref(), result.dataPref());
603 matrix3<> resultSum = callPref(eblas_sum)(gInfo.nG, result.dataPref());
604 return gInfo.detR * (gInfo.GT * resultSum * gInfo.G);
605 }
606
607 //------------------------------ Nonlinear Unary operators ------------------------------
608
exp_sub(size_t i,double * X,double prefac)609 void exp_sub(size_t i, double* X, double prefac) { X[i] = exp(prefac*X[i]); }
610 #ifdef GPU_ENABLED
611 void exp_gpu(int N, double* X, double prefac); //in operators.cu
612 #endif
exp(ScalarField && X)613 ScalarField exp(ScalarField&& X)
614 {
615 #ifdef GPU_ENABLED
616 exp_gpu(X->nElem, X->dataGpu(false), X->scale);
617 #else
618 threadedLoop(exp_sub, X->nElem, X->data(false), X->scale);
619 #endif
620 X->scale = 1.0;
621 return X;
622 }
exp(const ScalarField & X)623 ScalarField exp(const ScalarField& X) { return exp(X->clone()); }
624
log_sub(size_t i,double * X,double prefac)625 void log_sub(size_t i, double* X, double prefac) { X[i] = log(prefac*X[i]); }
626 #ifdef GPU_ENABLED
627 void log_gpu(int N, double* X, double prefac); //in operators.cu
628 #endif
log(ScalarField && X)629 ScalarField log(ScalarField&& X)
630 {
631 #ifdef GPU_ENABLED
632 log_gpu(X->nElem, X->dataGpu(false), X->scale);
633 #else
634 threadedLoop(log_sub, X->nElem, X->data(false), X->scale);
635 #endif
636 X->scale = 1.0;
637 return X;
638 }
log(const ScalarField & X)639 ScalarField log(const ScalarField& X) { return log(X->clone()); }
640
641
sqrt_sub(size_t i,double * X,double prefac)642 void sqrt_sub(size_t i, double* X, double prefac) { X[i] = sqrt(prefac*X[i]); }
643 #ifdef GPU_ENABLED
644 void sqrt_gpu(int N, double* X, double prefac); //in operators.cu
645 #endif
sqrt(ScalarField && X)646 ScalarField sqrt(ScalarField&& X)
647 {
648 #ifdef GPU_ENABLED
649 sqrt_gpu(X->nElem, X->dataGpu(false), X->scale);
650 #else
651 threadedLoop(sqrt_sub, X->nElem, X->data(false), X->scale);
652 #endif
653 X->scale = 1.0;
654 return X;
655 }
sqrt(const ScalarField & X)656 ScalarField sqrt(const ScalarField& X) { return sqrt(X->clone()); }
657
658
inv_sub(size_t i,double * X,double prefac)659 void inv_sub(size_t i, double* X, double prefac) { X[i] = prefac/X[i]; }
660 #ifdef GPU_ENABLED
661 void inv_gpu(int N, double* X, double prefac); //in operators.cu
662 #endif
inv(ScalarField && X)663 ScalarField inv(ScalarField&& X)
664 {
665 #ifdef GPU_ENABLED
666 inv_gpu(X->nElem, X->dataGpu(false), 1.0/X->scale);
667 #else
668 threadedLoop(inv_sub, X->nElem, X->data(false), 1.0/X->scale);
669 #endif
670 X->scale = 1.0;
671 return X;
672 }
inv(const ScalarField & X)673 ScalarField inv(const ScalarField& X) { return inv(X->clone()); }
674
pow_sub(size_t i,double * X,double scale,double alpha)675 void pow_sub(size_t i, double* X, double scale, double alpha) { X[i] = pow(scale*X[i],alpha); }
676 #ifdef GPU_ENABLED
677 void pow_gpu(int N, double* X, double scale, double alpha); //in operators.cu
678 #endif
pow(ScalarField && X,double alpha)679 ScalarField pow(ScalarField&& X, double alpha)
680 {
681 #ifdef GPU_ENABLED
682 pow_gpu(X->nElem, X->dataGpu(false), X->scale, alpha);
683 #else
684 threadedLoop(pow_sub, X->nElem, X->data(false), X->scale, alpha);
685 #endif
686 X->scale = 1.0;
687 return X;
688 }
pow(const ScalarField & X,double alpha)689 ScalarField pow(const ScalarField& X, double alpha) { return pow(X->clone(), alpha); }
690
691
692 //------------------------------ Multiplication operators------------------------------
693
operator *=(ScalarField & in,const ScalarField & other)694 ScalarField& operator*=(ScalarField& in, const ScalarField& other)
695 { in->scale *= other->scale;
696 callPref(eblas_dmul)(in->nElem, other->dataPref(false), 1, in->dataPref(false), 1);
697 return in;
698 }
699
operator *=(complexScalarField & in,const ScalarField & other)700 complexScalarField& operator*=(complexScalarField& in, const ScalarField& other)
701 { in->scale *= other->scale;
702 callPref(eblas_zmuld)(in->nElem, other->dataPref(false), 1, in->dataPref(false), 1);
703 return in;
704 }
operator *(const complexScalarField & inC,const ScalarField & inR)705 complexScalarField operator*(const complexScalarField& inC, const ScalarField& inR) { complexScalarField out(inC->clone()); return out *= inR; }
operator *(const ScalarField & inR,const complexScalarField & inC)706 complexScalarField operator*(const ScalarField& inR, const complexScalarField& inC) { complexScalarField out(inC->clone()); return out *= inR; }
operator *(complexScalarField && inC,const ScalarField & inR)707 complexScalarField operator*(complexScalarField&& inC, const ScalarField& inR) { return inC *= inR; }
operator *(const ScalarField & inR,complexScalarField && inC)708 complexScalarField operator*(const ScalarField& inR, complexScalarField&& inC) { return inC *= inR; }
709
operator *=(ScalarFieldTilde & inG,const RealKernel & inR)710 ScalarFieldTilde& operator*=(ScalarFieldTilde& inG, const RealKernel& inR)
711 { callPref(eblas_zmuld)(inG->nElem, inR.dataPref(), 1, inG->dataPref(false), 1);
712 return inG;
713 }
operator *(const RealKernel & inR,const ScalarFieldTilde & inG)714 ScalarFieldTilde operator*(const RealKernel& inR, const ScalarFieldTilde& inG) { ScalarFieldTilde out(inG->clone()); return out *= inR; }
operator *(const ScalarFieldTilde & inG,const RealKernel & inR)715 ScalarFieldTilde operator*(const ScalarFieldTilde& inG, const RealKernel& inR) { ScalarFieldTilde out(inG->clone()); return out *= inR; }
operator *(const RealKernel & inR,ScalarFieldTilde && inG)716 ScalarFieldTilde operator*(const RealKernel& inR, ScalarFieldTilde&& inG) { return inG *= inR; }
operator *(ScalarFieldTilde && inG,const RealKernel & inR)717 ScalarFieldTilde operator*(ScalarFieldTilde&& inG, const RealKernel& inR) { return inG *= inR; }
718
719
720 //------------------------------ Linear combine operators ------------------------------
721
axpy(double alpha,const ScalarField & X,ScalarField & Y)722 void axpy(double alpha, const ScalarField& X, ScalarField& Y)
723 { if(X)
724 { if(Y)
725 { if(Y->scale == 0.0) { Y = X * alpha; }
726 else callPref(eblas_daxpy)(X->nElem, alpha*X->scale/Y->scale, X->dataPref(false), 1, Y->dataPref(false), 1);
727 }
728 else Y = X * alpha;
729 }
730 //if X is null, nothing needs to be done, Y remains unchanged
731 }
operator +=(ScalarField & in,double scalar)732 ScalarField& operator+=(ScalarField& in, double scalar)
733 { ManagedArray<double> dataScalar(&scalar, 1); //Managed array holding scalar
734 callPref(eblas_daxpy)(in->nElem, 1.0, dataScalar.dataPref(), 0, in->dataPref(), 1);
735 return in;
736 }
operator +(double scalar,const ScalarField & in)737 ScalarField operator+(double scalar, const ScalarField& in) { ScalarField out(in->clone()); return out += scalar; }
operator +(const ScalarField & in,double scalar)738 ScalarField operator+(const ScalarField& in, double scalar) { ScalarField out(in->clone()); return out += scalar; }
operator +(double scalar,ScalarField && in)739 ScalarField operator+(double scalar, ScalarField&& in) { return in += scalar; }
operator +(ScalarField && in,double scalar)740 ScalarField operator+(ScalarField&& in, double scalar) { return in += scalar; }
operator -=(ScalarField & in,double scalar)741 ScalarField& operator-=(ScalarField& in, double scalar)
742 { return (in += -scalar);
743 }
operator -(double scalar,const ScalarField & in)744 ScalarField operator-(double scalar, const ScalarField& in) { ScalarField out(in->clone()); return (out *= -1.0) += scalar; }
operator -(const ScalarField & in,double scalar)745 ScalarField operator-(const ScalarField& in, double scalar) { ScalarField out(in->clone()); return out -= scalar; }
operator -(double scalar,ScalarField && in)746 ScalarField operator-(double scalar, ScalarField&& in) { return (in *= -1.0) += scalar; }
operator -(ScalarField && in,double scalar)747 ScalarField operator-(ScalarField&& in, double scalar) { return in -= scalar; }
748
749
750 //------------------------------ Dot products and 2-norms ------------------------------
751
dot(const ScalarField & X,const ScalarField & Y)752 double dot(const ScalarField& X, const ScalarField& Y)
753 { return X->scale * Y->scale * callPref(eblas_ddot)(X->nElem, X->dataPref(false), 1, Y->dataPref(false), 1);
754 }
755
dot(const ScalarFieldTilde & X,const ScalarFieldTilde & Y)756 double dot(const ScalarFieldTilde& X, const ScalarFieldTilde& Y)
757 { int N = X->nElem;
758 int S2 = X->gInfo.S[2]/2 + 1; //inner dimension
759 int S01 = X->gInfo.S[0] * X->gInfo.S[1]; //number of inner dimension slices
760 complex complexDot = callPref(eblas_zdotc)(N, X->dataPref(false), 1, Y->dataPref(false), 1);
761 complex correction1 = callPref(eblas_zdotc)(S01, X->dataPref(false), S2, Y->dataPref(false), S2);
762 complex correction2 = callPref(eblas_zdotc)(S01, X->dataPref(false)+S2-1, S2, Y->dataPref(false)+S2-1, S2);
763 if(S2==1) correction2=complex(0,0); //because slices 1 and 2 are the same
764 return X->scale * Y->scale * (2.0*complexDot - correction1 - correction2).real();
765 }
766
nrm2(const ScalarField & X)767 double nrm2(const ScalarField& X)
768 { return fabs(X->scale) * callPref(eblas_dnrm2)(X->nElem, X->dataPref(false), 1);
769 }
770
nrm2(const ScalarFieldTilde & X)771 double nrm2(const ScalarFieldTilde& X)
772 { int N = X->nElem;
773 int S2 = X->gInfo.S[2]/2 + 1; //inner dimension
774 int S01 = X->gInfo.S[0] * X->gInfo.S[1]; //number of inner dimension slices
775 double complexNorm = callPref(eblas_dznrm2)(N, X->dataPref(false), 1);
776 double correction1 = callPref(eblas_dznrm2)(S01, X->dataPref(false), S2);
777 double correction2 = callPref(eblas_dznrm2)(S01, X->dataPref(false)+S2-1, S2);
778 if(S2==1) correction2=0; //because slices 1 and 2 are the same
779 return fabs(X->scale) * sqrt(2*pow(complexNorm,2) - pow(correction1,2) - pow(correction2,2));
780 }
781
sum(const ScalarField & X)782 double sum(const ScalarField& X)
783 { ManagedArray<double> dataScale(&X->scale, 1); //Managed array holding X->scale
784 return callPref(eblas_ddot)(X->nElem, X->dataPref(false), 1, dataScale.dataPref(), 0);
785 }
786
sum(const ScalarFieldTilde & X)787 double sum(const ScalarFieldTilde& X)
788 { int N = X->nElem;
789 int S2 = X->gInfo.S[2]/2 + 1; //inner dimension
790 int S01 = X->gInfo.S[0] * X->gInfo.S[1]; //number of inner dimension slices
791 ManagedArray<complex> dataOne(std::vector<complex>(1, complex(1.,0.))); //Managed data storing "1"
792 complex complexSum = callPref(eblas_zdotc)(N, X->dataPref(false), 1, dataOne.dataPref(), 0);
793 complex correction1 = callPref(eblas_zdotc)(S01, X->dataPref(false), S2, dataOne.dataPref(), 0);
794 complex correction2 = callPref(eblas_zdotc)(S01, X->dataPref(false)+S2-1, S2, dataOne.dataPref(), 0);
795 if(S2==1) correction2 = 0; //because slices 1 and 2 are the same
796 return X->scale * (2.0*complexSum - correction1 - correction2).real();
797 }
798
799
800
integral(const ScalarField & X)801 double integral(const ScalarField& X)
802 { return X->gInfo.dV * sum(X);
803 }
integral(const ScalarFieldTilde & X)804 double integral(const ScalarFieldTilde& X)
805 { double XdataZero;
806 #ifdef GPU_ENABLED
807 cudaMemcpy(&XdataZero, X->dataGpu(false), sizeof(double), cudaMemcpyDeviceToHost);
808 #else
809 XdataZero = X->data(false)[0].real();
810 #endif
811 return XdataZero * X->gInfo.detR * X->scale;
812 }
integral(const complexScalarField & X)813 complex integral(const complexScalarField& X)
814 { return X->gInfo.dV * sum(X);
815 }
integral(const complexScalarFieldTilde & X)816 complex integral(const complexScalarFieldTilde& X)
817 { complex XdataZero;
818 #ifdef GPU_ENABLED
819 cudaMemcpy(&XdataZero, X->dataGpu(false), sizeof(complex), cudaMemcpyDeviceToHost);
820 #else
821 XdataZero = X->data(false)[0];
822 #endif
823 return XdataZero * X->gInfo.detR * X->scale;
824 }
825
826 //------------------------------ Grid conversion utilities ------------------------------
827
changeGrid_sub(size_t iStart,size_t iStop,const vector3<int> & S,const vector3<int> & Sin,const vector3<int> & Sout,const complex * in,complex * out)828 void changeGrid_sub(size_t iStart, size_t iStop, const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out)
829 { THREAD_halfGspaceLoop( changeGrid_calc(iG, Sin, Sout, in, out); )
830 }
changeGrid(const vector3<int> & S,const vector3<int> & Sin,const vector3<int> & Sout,const complex * in,complex * out)831 void changeGrid(const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out)
832 { threadLaunch(changeGrid_sub, S[0]*S[1]*(1+S[2]/2), S, Sin, Sout, in, out);
833 }
834 #ifdef GPU_ENABLED
835 void changeGrid_gpu(const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out);
836 #endif
changeGrid(const ScalarFieldTilde & in,const GridInfo & gInfoNew)837 ScalarFieldTilde changeGrid(const ScalarFieldTilde& in, const GridInfo& gInfoNew)
838 { static StopWatch watch("changeGrid"); watch.start();
839 ScalarFieldTilde out; nullToZero(out, gInfoNew);
840 assert(gInfoNew.R == in->gInfo.R);
841 const vector3<int>& Sin = in->gInfo.S;
842 const vector3<int>& Sout = gInfoNew.S;
843 vector3<int> Smax; for(int k=0; k<3; k++) Smax[k] = std::max(Sin[k],Sout[k]);
844 callPref(changeGrid)(Smax, Sin, Sout, in->dataPref(), out->dataPref());
845 watch.stop();
846 return out;
847 }
848
changeGrid(const ScalarField & in,const GridInfo & gInfoNew)849 ScalarField changeGrid(const ScalarField& in, const GridInfo& gInfoNew)
850 { return I(changeGrid(J(in), gInfoNew));
851 }
852
853
changeGridFull_sub(size_t iStart,size_t iStop,const vector3<int> & S,const vector3<int> & Sin,const vector3<int> & Sout,const complex * in,complex * out)854 void changeGridFull_sub(size_t iStart, size_t iStop, const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out)
855 { THREAD_fullGspaceLoop( changeGridFull_calc(iG, Sin, Sout, in, out); )
856 }
changeGridFull(const vector3<int> & S,const vector3<int> & Sin,const vector3<int> & Sout,const complex * in,complex * out)857 void changeGridFull(const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out)
858 { threadLaunch(changeGridFull_sub, S[0]*S[1]*S[2], S, Sin, Sout, in, out);
859 }
860 #ifdef GPU_ENABLED
861 void changeGridFull_gpu(const vector3<int>& S, const vector3<int>& Sin, const vector3<int>& Sout, const complex* in, complex* out);
862 #endif
changeGrid(const complexScalarFieldTilde & in,const GridInfo & gInfoNew)863 complexScalarFieldTilde changeGrid(const complexScalarFieldTilde& in, const GridInfo& gInfoNew)
864 { static StopWatch watch("changeGridFull"); watch.start();
865 complexScalarFieldTilde out; nullToZero(out, gInfoNew);
866 assert(gInfoNew.R == in->gInfo.R);
867 const vector3<int>& Sin = in->gInfo.S;
868 const vector3<int>& Sout = gInfoNew.S;
869 vector3<int> Smax; for(int k=0; k<3; k++) Smax[k] = std::max(Sin[k],Sout[k]);
870 callPref(changeGridFull)(Smax, Sin, Sout, in->dataPref(), out->dataPref());
871 watch.stop();
872 return out;
873 }
874
changeGrid(const complexScalarField & in,const GridInfo & gInfoNew)875 complexScalarField changeGrid(const complexScalarField& in, const GridInfo& gInfoNew)
876 { return I(changeGrid(J(in), gInfoNew));
877 }
878
879
880 //------------------------------ Initialization utilities ------------------------------
881
initRandom(ScalarField & X,double cap)882 void initRandom(ScalarField& X, double cap)
883 { double* Xdata = X->data();
884 for(int i=0; i<X->nElem; i++)
885 Xdata[i] = Random::normal(0, 1, cap);
886 }
887
initRandomFlat(ScalarField & X)888 void initRandomFlat(ScalarField& X)
889 { double* Xdata = X->data();
890 for(int i=0; i<X->nElem; i++)
891 Xdata[i] = Random::uniform();
892 }
893
initGaussianKernel_sub(int i,double Gsq,double expfac,double * X)894 void initGaussianKernel_sub(int i, double Gsq, double expfac, double* X)
895 { X[i] = exp(expfac*Gsq);
896 }
initGaussianKernel(RealKernel & X,double x0)897 void initGaussianKernel(RealKernel& X, double x0)
898 { applyFuncGsq(X.gInfo, initGaussianKernel_sub, -pow(0.5*x0,2), X.data());
899 }
900
initTranslation_sub(size_t iStart,size_t iStop,const vector3<int> S,const vector3<> Gr,complex * X)901 void initTranslation_sub(size_t iStart, size_t iStop, const vector3<int> S, const vector3<> Gr, complex* X)
902 { THREAD_halfGspaceLoop( X[i] = cis(-dot(iG,Gr)); )
903 }
initTranslation(ScalarFieldTilde & X,const vector3<> & r)904 void initTranslation(ScalarFieldTilde& X, const vector3<>& r)
905 { const GridInfo& gInfo = X->gInfo;
906 threadLaunch(initTranslation_sub, gInfo.nG, gInfo.S, gInfo.G*r, X->data());
907 }
908
909
gaussConvolve_sub(size_t iStart,size_t iStop,const vector3<int> & S,const matrix3<> & GGT,complex * data,double sigma)910 void gaussConvolve_sub(size_t iStart, size_t iStop, const vector3<int>& S, const matrix3<>& GGT, complex* data, double sigma)
911 { THREAD_halfGspaceLoop( data[i] *= exp(-0.5*sigma*sigma*GGT.metric_length_squared(iG)); )
912 }
gaussConvolve(const vector3<int> & S,const matrix3<> & GGT,complex * data,double sigma)913 void gaussConvolve(const vector3<int>& S, const matrix3<>& GGT, complex* data, double sigma)
914 { threadLaunch(gaussConvolve_sub, S[0]*S[1]*(1+S[2]/2), S, GGT, data, sigma);
915 }
916 #ifdef GPU_ENABLED
917 void gaussConvolve_gpu(const vector3<int>& S, const matrix3<>& GGT, complex* data, double sigma);
918 #endif
gaussConvolve(ScalarFieldTilde && in,double sigma)919 ScalarFieldTilde gaussConvolve(ScalarFieldTilde&& in, double sigma)
920 { assert(in);
921 callPref(gaussConvolve)(in->gInfo.S, in->gInfo.GGT, in->dataPref(false), sigma);
922 return in;
923 }
gaussConvolve(const ScalarFieldTilde & in,double sigma)924 ScalarFieldTilde gaussConvolve(const ScalarFieldTilde& in, double sigma)
925 { ScalarFieldTilde out(in->clone());
926 return gaussConvolve((ScalarFieldTilde&&)out, sigma);
927 }
928
929 //------------------------------ Debug utilities ------------------------------
930
printStats(const ScalarField & X,const char * name,FILE * fp)931 void printStats(const ScalarField& X, const char* name, FILE* fp)
932 { int N = X->nElem;
933 double mean = sum(X)/N;
934 double stdDev = sqrt(fabs(dot(X,X)/N - mean*mean));
935 double minVal, maxVal; callPref(eblas_capMinMax)(N, X->dataPref(), minVal, maxVal);
936 fprintf(fp, "vector %s\t= %.15le +/- %.15le min: %le max: %le\n", name, mean, stdDev, minVal, maxVal);
937 }
938
939 //------------------------------ From VectorField.h ------------------------------
940
gradient_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> G,const complex * Xtilde,vector3<complex * > gradTilde)941 inline void gradient_sub(size_t iStart, size_t iStop, const vector3<int> S,
942 const matrix3<> G, const complex* Xtilde, vector3<complex*> gradTilde)
943 { THREAD_halfGspaceLoop( gradient_calc(i, iG, IS_NYQUIST, G, Xtilde, gradTilde); )
944 }
945 #ifdef GPU_ENABLED //implemented in Operators.cu
946 void gradient_gpu(const vector3<int> S, const matrix3<> G, const complex* Xtilde, vector3<complex*> gradTilde);
947 #endif
gradient(const ScalarFieldTilde & Xtilde)948 VectorFieldTilde gradient(const ScalarFieldTilde& Xtilde)
949 { const GridInfo& gInfo = Xtilde->gInfo;
950 VectorFieldTilde gradTilde(gInfo, isGpuEnabled());
951 #ifdef GPU_ENABLED
952 gradient_gpu(gInfo.S, gInfo.G, Xtilde->dataGpu(), gradTilde.dataGpu());
953 #else
954 threadLaunch(gradient_sub, gInfo.nG, gInfo.S, gInfo.G, Xtilde->data(), gradTilde.data());
955 #endif
956 return gradTilde;
957 }
gradient(const ScalarField & X)958 VectorField gradient(const ScalarField& X) { return I(gradient(J(X))); }
959
960
divergence_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> G,vector3<const complex * > Vtilde,complex * divTilde)961 inline void divergence_sub(size_t iStart, size_t iStop, const vector3<int> S,
962 const matrix3<> G, vector3<const complex*> Vtilde, complex* divTilde)
963 { THREAD_halfGspaceLoop( divergence_calc(i, iG, IS_NYQUIST, G, Vtilde, divTilde); )
964 }
965 #ifdef GPU_ENABLED
966 void divergence_gpu(const vector3<int> S, const matrix3<> G, vector3<const complex*> Vtilde, complex* divTilde);
967 #endif
divergence(const VectorFieldTilde & Vtilde)968 ScalarFieldTilde divergence(const VectorFieldTilde& Vtilde)
969 { const GridInfo& gInfo = Vtilde[0]->gInfo;
970 ScalarFieldTilde divTilde(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled()));
971 #ifdef GPU_ENABLED
972 divergence_gpu(gInfo.S, gInfo.G, Vtilde.dataGpu(), divTilde->dataGpu());
973 #else
974 threadLaunch(divergence_sub, gInfo.nG, gInfo.S, gInfo.G, Vtilde.data(), divTilde->data());
975 #endif
976 return divTilde;
977 }
divergence(const VectorField & V)978 ScalarField divergence(const VectorField& V) { return I(divergence(J(V))); }
979
980
tensorGradient_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> G,const complex * Xtilde,tensor3<complex * > gradTilde)981 inline void tensorGradient_sub(size_t iStart, size_t iStop, const vector3<int> S,
982 const matrix3<> G, const complex* Xtilde, tensor3<complex*> gradTilde)
983 { THREAD_halfGspaceLoop( tensorGradient_calc(i, iG, IS_NYQUIST, G, Xtilde, gradTilde); )
984 }
985 #ifdef GPU_ENABLED //implemented in Operators.cu
986 void tensorGradient_gpu(const vector3<int> S, const matrix3<> G, const complex* Xtilde, tensor3<complex*> gradTilde);
987 #endif
tensorGradient(const ScalarFieldTilde & Xtilde)988 TensorFieldTilde tensorGradient(const ScalarFieldTilde& Xtilde)
989 { const GridInfo& gInfo = Xtilde->gInfo;
990 TensorFieldTilde gradTilde(gInfo, isGpuEnabled());
991 #ifdef GPU_ENABLED
992 tensorGradient_gpu(gInfo.S, gInfo.G, Xtilde->dataGpu(), gradTilde.dataGpu());
993 #else
994 threadLaunch(tensorGradient_sub, gInfo.nG, gInfo.S, gInfo.G, Xtilde->data(), gradTilde.data());
995 #endif
996 return gradTilde;
997 }
998
999
tensorDivergence_sub(size_t iStart,size_t iStop,const vector3<int> S,const matrix3<> G,tensor3<const complex * > Vtilde,complex * divTilde)1000 inline void tensorDivergence_sub(size_t iStart, size_t iStop, const vector3<int> S,
1001 const matrix3<> G, tensor3<const complex*> Vtilde, complex* divTilde)
1002 { THREAD_halfGspaceLoop( tensorDivergence_calc(i, iG, IS_NYQUIST, G, Vtilde, divTilde); )
1003 }
1004 #ifdef GPU_ENABLED
1005 void tensorDivergence_gpu(const vector3<int> S, const matrix3<> G, tensor3<const complex*> Vtilde, complex* divTilde);
1006 #endif
tensorDivergence(const TensorFieldTilde & Vtilde)1007 ScalarFieldTilde tensorDivergence(const TensorFieldTilde& Vtilde)
1008 { const GridInfo& gInfo = Vtilde[0]->gInfo;
1009 ScalarFieldTilde divTilde(ScalarFieldTildeData::alloc(gInfo, isGpuEnabled()));
1010 #ifdef GPU_ENABLED
1011 tensorDivergence_gpu(gInfo.S, gInfo.G, Vtilde.dataGpu(), divTilde->dataGpu());
1012 #else
1013 threadLaunch(tensorDivergence_sub, gInfo.nG, gInfo.S, gInfo.G, Vtilde.data(), divTilde->data());
1014 #endif
1015 return divTilde;
1016 }
1017
1018