1 //////////////////////////////////////////////////////////////////
2 // (c) Copyright 2003- by Ken Esler and Jeongnim Kim //
3 //////////////////////////////////////////////////////////////////
4 // National Center for Supercomputing Applications & //
5 // Materials Computation Center //
6 // University of Illinois, Urbana-Champaign //
7 // Urbana, IL 61801 //
8 // e-mail: jnkim@ncsa.uiuc.edu //
9 // //
10 // Supported by //
11 // National Center for Supercomputing Applications, UIUC //
12 // Materials Computation Center, UIUC //
13 //////////////////////////////////////////////////////////////////
14 /** helper functions for EinsplineSetBuilder
15 */
16 #ifndef QMCPLUSPLUS_EINSPLINEBUILDER_HELPER_H
17 #define QMCPLUSPLUS_EINSPLINEBUILDER_HELPER_H
18 #include <complex>
19 #include "OhmmsPETE/TinyVector.h"
20 #include "OhmmsPETE/OhmmsVector.h"
21 #include "OhmmsPETE/OhmmsArray.h"
22 #include "mpi/collectives.h"
23 #include "CPU/SIMD/simd.hpp"
24 #include "config/stdlib/math.hpp"
25
26 namespace qmcplusplus
27 {
28 /** unpack packed cG to fftbox
29 * @param cG packed vector
30 * @param gvecs g-coordinate for cG[i]
31 * @param maxg fft grid
32 * @param fftbox unpacked data to be transformed
33 */
34 template<typename T>
unpack4fftw(const Vector<std::complex<T>> & cG,const std::vector<TinyVector<int,3>> & gvecs,const TinyVector<int,3> & maxg,Array<std::complex<T>,3> & fftbox)35 inline void unpack4fftw(const Vector<std::complex<T>>& cG,
36 const std::vector<TinyVector<int, 3>>& gvecs,
37 const TinyVector<int, 3>& maxg,
38 Array<std::complex<T>, 3>& fftbox)
39 {
40 fftbox = std::complex<T>();
41 const int upper_bound[3] = {(maxg[0] - 1) / 2, (maxg[1] - 1) / 2, (maxg[2] - 1) / 2};
42 const int lower_bound[3] = {upper_bound[0] - maxg[0] + 1, upper_bound[1] - maxg[1] + 1, upper_bound[2] - maxg[2] + 1};
43 //only coefficient indices between [lower_bound,upper_bound] are taken for FFT.
44 //this is rather unsafe
45 //#pragma omp parallel for
46 for (int iG = 0; iG < cG.size(); iG++)
47 {
48 if (gvecs[iG][0] > upper_bound[0] || gvecs[iG][0] < lower_bound[0] || gvecs[iG][1] > upper_bound[1] ||
49 gvecs[iG][1] < lower_bound[1] || gvecs[iG][2] > upper_bound[2] || gvecs[iG][2] < lower_bound[2])
50 {
51 //std::cout << "Warning: cG out of bound "
52 // << "x " << gvecs[iG][0] << " y " << gvecs[iG][1] << " z " << gvecs[iG][2] << std::endl
53 // << "xu " << upper_bound[0] << " yu " << upper_bound[1] << " zu " << upper_bound[2] << std::endl
54 // << "xd " << lower_bound[0] << " yd " << lower_bound[1] << " zd " << lower_bound[2] << std::endl;
55 continue;
56 }
57 fftbox((gvecs[iG][0] + maxg[0]) % maxg[0], (gvecs[iG][1] + maxg[1]) % maxg[1], (gvecs[iG][2] + maxg[2]) % maxg[2]) =
58 cG[iG];
59 }
60 }
61
62 /** fix phase
63 * @param in input complex data
64 * @param out output real data
65 * @param twist vector to correct phase
66 */
67 template<typename T, typename T1, typename T2>
fix_phase_c2r(const Array<std::complex<T>,3> & in,Array<T1,3> & out,const TinyVector<T2,3> & twist)68 inline void fix_phase_c2r(const Array<std::complex<T>, 3>& in, Array<T1, 3>& out, const TinyVector<T2, 3>& twist)
69 {
70 const T1 two_pi = -2.0 * M_PI;
71 const int nx = in.size(0);
72 const int ny = in.size(1);
73 const int nz = in.size(2);
74 T1 nx_i = static_cast<T1>(twist[0]) / static_cast<T1>(nx);
75 T1 ny_i = static_cast<T1>(twist[1]) / static_cast<T1>(ny);
76 T1 nz_i = static_cast<T1>(twist[2]) / static_cast<T1>(nz);
77
78 #pragma omp parallel for firstprivate(nx_i, ny_i, nz_i)
79 for (int ix = 0; ix < nx; ix++)
80 {
81 T1 s, c;
82 const std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
83 T1* restrict out_ptr = out.data() + ix * ny * nz;
84
85 T1 rux = static_cast<T1>(ix) * nx_i;
86 for (int iy = 0; iy < ny; iy++)
87 {
88 T1 ruy = static_cast<T1>(iy) * ny_i;
89 for (int iz = 0; iz < nz; iz++)
90 {
91 T1 ruz = static_cast<T1>(iz) * nz_i;
92 qmcplusplus::sincos(two_pi * (rux + ruy + ruz), &s, &c);
93 *out_ptr = static_cast<T1>(c * in_ptr->real() - s * in_ptr->imag());
94 ++out_ptr;
95 ++in_ptr;
96 }
97 }
98 }
99 //#pragma omp parallel for
100 // for (int ix=0; ix<nx; ix++) {
101 // PosType ru;
102 // ru[0] = (RealType)ix / (RealType)nx;
103 // for (int iy=0; iy<ny; iy++) {
104 // ru[1] = (RealType)iy / (RealType)ny;
105 // for (int iz=0; iz<nz; iz++) {
106 // ru[2] = (RealType)iz / (RealType)nz;
107 // double phi = -2.0*M_PI*dot (ru, TwistAngles[ti]);
108 // double s, c;
109 // qmcplusplus::sincos(phi, &s, &c);
110 // std::complex<double> phase(c,s);
111 // std::complex<double> z = phase*rawData(ix,iy,iz);
112 // splineData(ix,iy,iz) = z.real();
113 // }
114 // }
115 // }
116 }
117
118
119 /** rotate the state after 3dfft
120 *
121 * First, add the eikr phase factor.
122 * Then, rotate the phase of the orbitals so that neither
123 * the real part nor the imaginary part are very near
124 * zero. This sometimes happens in crystals with high
125 * symmetry at special k-points.
126 */
127 template<typename T, typename T1, typename T2>
fix_phase_rotate_c2r(Array<std::complex<T>,3> & in,Array<T1,3> & out,const TinyVector<T2,3> & twist,T & phase_r,T & phase_i)128 inline void fix_phase_rotate_c2r(Array<std::complex<T>, 3>& in,
129 Array<T1, 3>& out,
130 const TinyVector<T2, 3>& twist,
131 T& phase_r,
132 T& phase_i)
133 {
134 const T two_pi = -2.0 * M_PI;
135 const int nx = in.size(0);
136 const int ny = in.size(1);
137 const int nz = in.size(2);
138 T nx_i = 1.0 / static_cast<T>(nx);
139 T ny_i = 1.0 / static_cast<T>(ny);
140 T nz_i = 1.0 / static_cast<T>(nz);
141
142 T rNorm = 0.0, iNorm = 0.0, riNorm = 0.0;
143 #pragma omp parallel for reduction(+ : rNorm, iNorm, riNorm)
144 for (int ix = 0; ix < nx; ix++)
145 {
146 T s, c;
147 std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
148 T rux = static_cast<T>(ix) * nx_i * twist[0];
149 for (int iy = 0; iy < ny; iy++)
150 {
151 T ruy = static_cast<T>(iy) * ny_i * twist[1];
152 for (int iz = 0; iz < nz; iz++)
153 {
154 T ruz = static_cast<T>(iz) * nz_i * twist[2];
155 qmcplusplus::sincos(two_pi * (rux + ruy + ruz), &s, &c);
156 std::complex<T> eikr(c, s);
157 *in_ptr *= eikr;
158 rNorm += in_ptr->real() * in_ptr->real();
159 iNorm += in_ptr->imag() * in_ptr->imag();
160 riNorm += in_ptr->real() * in_ptr->imag();
161 ++in_ptr;
162 }
163 }
164 }
165
166 const T x = (rNorm - iNorm) / riNorm;
167 const T y = 1.0 / std::sqrt(x * x + 4.0);
168 const T phs = std::sqrt(0.5 - y);
169 phase_r = phs;
170 phase_i = (x < 0) ? std::sqrt(1.0 - phs * phs) : -std::sqrt(1.0 - phs * phs);
171
172 #pragma omp parallel for
173 for (int ix = 0; ix < nx; ix++)
174 {
175 const std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
176 T1* restrict out_ptr = out.data() + ix * ny * nz;
177 for (int iy = 0; iy < ny; iy++)
178 for (int iz = 0; iz < nz; iz++)
179 {
180 *out_ptr = static_cast<T1>(phase_r * in_ptr->real() - phase_i * in_ptr->imag());
181 ++in_ptr;
182 ++out_ptr;
183 }
184 }
185 }
186
187 template<typename T, typename T1, typename T2>
fix_phase_rotate_c2c(const Array<std::complex<T>,3> & in,Array<std::complex<T1>,3> & out,const TinyVector<T2,3> & twist)188 inline void fix_phase_rotate_c2c(const Array<std::complex<T>, 3>& in,
189 Array<std::complex<T1>, 3>& out,
190 const TinyVector<T2, 3>& twist)
191 {
192 const int nx = in.size(0);
193 const int ny = in.size(1);
194 const int nz = in.size(2);
195 T phase_r, phase_i;
196
197 compute_phase(in, twist, phase_r, phase_i);
198
199 #pragma omp parallel for
200 for (int ix = 0; ix < nx; ++ix)
201 {
202 const std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
203 std::complex<T1>* restrict out_ptr = out.data() + ix * ny * nz;
204 for (int iy = 0; iy < ny; ++iy)
205 for (int iz = 0; iz < nz; ++iz)
206 {
207 *out_ptr = std::complex<T1>(static_cast<T1>(phase_r * in_ptr->real() - phase_i * in_ptr->imag()),
208 static_cast<T1>(phase_i * in_ptr->real() + phase_r * in_ptr->imag()));
209 ++out_ptr;
210 ++in_ptr;
211 }
212 }
213 }
214
215 template<typename T, typename T1, typename T2>
fix_phase_rotate_c2c(const Array<std::complex<T>,3> & in,Array<T1,3> & out_r,Array<T1,3> & out_i,const TinyVector<T2,3> & twist,T & phase_r,T & phase_i)216 inline void fix_phase_rotate_c2c(const Array<std::complex<T>, 3>& in,
217 Array<T1, 3>& out_r,
218 Array<T1, 3>& out_i,
219 const TinyVector<T2, 3>& twist,
220 T& phase_r,
221 T& phase_i)
222 {
223 const int nx = in.size(0);
224 const int ny = in.size(1);
225 const int nz = in.size(2);
226
227 compute_phase(in, twist, phase_r, phase_i);
228
229 #pragma omp parallel for
230 for (size_t ix = 0; ix < nx; ++ix)
231 for (size_t iy = 0; iy < ny; ++iy)
232 {
233 const size_t offset = ix * ny * nz + iy * nz;
234 const std::complex<T>* restrict in_ptr = in.data() + offset;
235 T1* restrict r_ptr = out_r.data() + offset;
236 T1* restrict i_ptr = out_i.data() + offset;
237 for (size_t iz = 0; iz < nz; ++iz)
238 {
239 r_ptr[iz] = static_cast<T1>(phase_r * in_ptr[iz].real() - phase_i * in_ptr[iz].imag());
240 i_ptr[iz] = static_cast<T1>(phase_i * in_ptr[iz].real() + phase_r * in_ptr[iz].imag());
241 }
242 }
243 }
244
245 /** Compute the norm of an orbital.
246 * @param cG the plane wave coefficients
247 * @return norm of the orbital
248 */
249 template<typename T>
compute_norm(const Vector<std::complex<T>> & cG)250 inline T compute_norm(const Vector<std::complex<T>>& cG)
251 {
252 T total_norm2(0);
253 #pragma omp parallel for reduction(+ : total_norm2)
254 for (size_t ig = 0; ig < cG.size(); ++ig)
255 total_norm2 += cG[ig].real() * cG[ig].real() + cG[ig].imag() * cG[ig].imag();
256 return std::sqrt(total_norm2);
257 }
258
259 /** Compute the phase factor for rotation. The algorithm aims at balanced real and imaginary parts.
260 * @param in the real space orbital value on a 3D grid
261 * @param twist k-point in reduced coordinates
262 * @param phase_r output real part of the phase
263 * @param phase_i output imaginary part of the phase
264 */
265 template<typename T, typename T2>
compute_phase(const Array<std::complex<T>,3> & in,const TinyVector<T2,3> & twist,T & phase_r,T & phase_i)266 inline void compute_phase(const Array<std::complex<T>, 3>& in, const TinyVector<T2, 3>& twist, T& phase_r, T& phase_i)
267 {
268 const T two_pi = -2.0 * M_PI;
269 const size_t nx = in.size(0);
270 const size_t ny = in.size(1);
271 const size_t nz = in.size(2);
272
273 const T nx_i = 1.0 / static_cast<T>(nx);
274 const T ny_i = 1.0 / static_cast<T>(ny);
275 const T nz_i = 1.0 / static_cast<T>(nz);
276
277 T rNorm = 0.0, iNorm = 0.0, riNorm = 0.0;
278 #pragma omp parallel for reduction(+ : rNorm, iNorm, riNorm)
279 for (size_t ix = 0; ix < nx; ++ix)
280 {
281 for (size_t iy = 0; iy < ny; ++iy)
282 {
283 const T rux = static_cast<T>(ix) * nx_i * twist[0];
284 T s, c;
285 T rsum = 0, isum = 0, risum = 0.0;
286 const T ruy = static_cast<T>(iy) * ny_i * twist[1];
287 const std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz + iy * nz;
288 for (size_t iz = 0; iz < nz; ++iz)
289 {
290 const T ruz = static_cast<T>(iz) * nz_i * twist[2];
291 qmcplusplus::sincos(two_pi * (rux + ruy + ruz), &s, &c);
292 const T re = c * in_ptr[iz].real() - s * in_ptr[iz].imag();
293 const T im = s * in_ptr[iz].real() + c * in_ptr[iz].imag();
294 rsum += re * re;
295 isum += im * im;
296 risum += re * im;
297 }
298 rNorm += rsum;
299 iNorm += isum;
300 riNorm += risum;
301 }
302 }
303
304 const T x = (rNorm - iNorm) / riNorm;
305 const T y = 1.0 / std::sqrt(x * x + 4.0);
306 const T phs = std::sqrt(0.5 - y);
307 phase_r = phs;
308 phase_i = (x < 0) ? std::sqrt(1.0 - phs * phs) : -std::sqrt(1.0 - phs * phs);
309 }
310
311 /** rotate the state after 3dfft
312 *
313 */
314 template<typename T>
fix_phase_rotate(const Array<std::complex<T>,3> & e2pi,Array<std::complex<T>,3> & in,Array<T,3> & out)315 inline void fix_phase_rotate(const Array<std::complex<T>, 3>& e2pi, Array<std::complex<T>, 3>& in, Array<T, 3>& out)
316 {
317 const int nx = e2pi.size(0);
318 const int ny = e2pi.size(1);
319 const int nz = e2pi.size(2);
320 T rNorm = 0.0, iNorm = 0.0;
321 //#pragma omp parallel for reduction(+:rNorm,iNorm)
322 for (int ix = 0; ix < nx; ix++)
323 {
324 T rpart = 0.0, ipart = 0.0;
325 const std::complex<T>* restrict p_ptr = e2pi.data() + ix * ny * nz;
326 std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
327 for (int iyz = 0; iyz < ny * nz; ++iyz)
328 {
329 in_ptr[iyz] *= p_ptr[iyz];
330 rpart += in_ptr[iyz].real() * in_ptr[iyz].real();
331 ipart += in_ptr[iyz].imag() * in_ptr[iyz].imag();
332 }
333 rNorm += rpart;
334 iNorm += ipart;
335 }
336
337 //#pragma omp parallel
338 {
339 T arg = std::atan2(iNorm, rNorm);
340 T phase_i, phase_r;
341 qmcplusplus::sincos(0.125 * M_PI - 0.5 * arg, &phase_i, &phase_r);
342 //#pragma omp for
343 for (int ix = 0; ix < nx; ix++)
344 {
345 const std::complex<T>* restrict in_ptr = in.data() + ix * ny * nz;
346 T* restrict out_ptr = out.data() + ix * ny * nz;
347 for (int iyz = 0; iyz < ny * nz; iyz++)
348 out_ptr[iyz] = phase_r * in_ptr[iyz].real() - phase_i * in_ptr[iyz].imag();
349 }
350 }
351 }
352
353 template<typename T>
fix_phase_rotate(const Array<std::complex<T>,3> & e2pi,const Array<std::complex<T>,3> & in,Array<std::complex<T>,3> & out)354 inline void fix_phase_rotate(const Array<std::complex<T>, 3>& e2pi,
355 const Array<std::complex<T>, 3>& in,
356 Array<std::complex<T>, 3>& out)
357 {
358 T rNorm = 0.0, iNorm = 0.0, riNorm = 0.0;
359
360 simd::accumulate_phases(e2pi.size(), e2pi.data(), in.data(), rNorm, iNorm, riNorm);
361
362 T x = (rNorm - iNorm) / riNorm;
363 x = 1.0 / std::sqrt(x * x + 4.0);
364 T phs = (x > 0.5) ? std::sqrt(0.5 + x) : std::sqrt(0.5 - x);
365 std::complex<T> phase_c(phs * phs / (rNorm + iNorm), (1.0 - phs * phs) / (rNorm + iNorm));
366
367 BLAS::axpy(in.size(), phase_c, in.data(), out.data());
368 }
369
370
bcastSortBands(int spin,int n,bool root)371 inline bool EinsplineSetBuilder::bcastSortBands(int spin, int n, bool root)
372 {
373 update_token(__FILE__, __LINE__, "bcastSortBands");
374
375 std::vector<BandInfo>& SortBands(*FullBands[spin]);
376
377 TinyVector<int, 4> nbands(int(SortBands.size()), n, NumValenceOrbs, NumCoreOrbs);
378 mpi::bcast(*myComm, nbands);
379
380 //buffer to serialize BandInfo
381 PooledData<OHMMS_PRECISION_FULL> misc(nbands[0] * 5);
382 bool isCore = false;
383 n = NumDistinctOrbitals = nbands[1];
384 NumValenceOrbs = nbands[2];
385 NumCoreOrbs = nbands[3];
386
387 if (root)
388 {
389 misc.rewind();
390 //misc.put(NumValenceOrbs);
391 //misc.put(NumCoreOrbs);
392 for (int i = 0; i < n; ++i)
393 {
394 misc.put(SortBands[i].TwistIndex);
395 misc.put(SortBands[i].BandIndex);
396 misc.put(SortBands[i].Energy);
397 misc.put(SortBands[i].MakeTwoCopies);
398 misc.put(SortBands[i].IsCoreState);
399
400 isCore |= SortBands[i].IsCoreState;
401 }
402
403 for (int i = n; i < SortBands.size(); ++i)
404 {
405 misc.put(SortBands[i].TwistIndex);
406 misc.put(SortBands[i].BandIndex);
407 misc.put(SortBands[i].Energy);
408 misc.put(SortBands[i].MakeTwoCopies);
409 misc.put(SortBands[i].IsCoreState);
410 }
411 }
412 myComm->bcast(misc);
413
414 if (!root)
415 {
416 SortBands.resize(nbands[0]);
417 misc.rewind();
418 //misc.get(NumValenceOrbs);
419 //misc.get(NumCoreOrbs);
420 for (int i = 0; i < n; ++i)
421 {
422 misc.get(SortBands[i].TwistIndex);
423 misc.get(SortBands[i].BandIndex);
424 misc.get(SortBands[i].Energy);
425 misc.get(SortBands[i].MakeTwoCopies);
426 misc.get(SortBands[i].IsCoreState);
427
428 isCore |= SortBands[i].IsCoreState;
429 }
430 for (int i = n; i < SortBands.size(); ++i)
431 {
432 misc.get(SortBands[i].TwistIndex);
433 misc.get(SortBands[i].BandIndex);
434 misc.get(SortBands[i].Energy);
435 misc.get(SortBands[i].MakeTwoCopies);
436 misc.get(SortBands[i].IsCoreState);
437 }
438 }
439
440 //char fname[64];
441 //sprintf(fname,"debug.%d",myComm->rank());
442 //ofstream fout(fname);
443 //fout.setf(std::ios::scientific, std::ios::floatfield);
444 //fout.precision(12);
445 //for(int i=0; i<misc.size();++i)
446 // fout << misc[i] << std::endl;
447 return isCore;
448 }
449
450 } // namespace qmcplusplus
451
452 #endif
453