1 /*-------------------------------------------------------------------
2 Copyright 2014 Ravishankar Sundararaman
3
4 This file is part of JDFTx.
5
6 JDFTx is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 JDFTx is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with JDFTx. If not, see <http://www.gnu.org/licenses/>.
18 -------------------------------------------------------------------*/
19
20 #include <wannier/WannierMinimizer.h>
21 #include <electronic/SpeciesInfo_internal.h>
22 #include <core/BlasExtra.h>
23 #include <core/Random.h>
24
init(const WannierMinimizer * wmin)25 void WannierGradient::init(const WannierMinimizer* wmin)
26 { this->wmin = wmin;
27 B1.resize(wmin->kMesh.size());
28 B2.resize(wmin->kMesh.size());
29 }
ikStart() const30 size_t WannierGradient::ikStart() const { return wmin->ikStart; }
ikStop() const31 size_t WannierGradient::ikStop() const { return wmin->ikStop; }
32
33 //---- linear algebra functions required by Minimizable<WannierGradient> -----
34
clone(const WannierGradient & grad)35 WannierGradient clone(const WannierGradient& grad) { return grad; }
dot(const WannierGradient & x,const WannierGradient & y)36 double dot(const WannierGradient& x, const WannierGradient& y)
37 { assert(x.wmin == y.wmin);
38 double result = 0.;
39 for(unsigned ik=x.ikStart(); ik<x.ikStop(); ik++)
40 { if(x.B1[ik]) result += dotc(x.B1[ik], y.B1[ik]).real() * 2.; //off-diagonal blocks of a Hermitian matrix
41 result += dotc(x.B2[ik], y.B2[ik]).real(); //diagonal block; always present
42 }
43 mpiWorld->allReduce(result, MPIUtil::ReduceSum);
44 return result;
45 }
operator *=(WannierGradient & x,double alpha)46 WannierGradient& operator*=(WannierGradient& x, double alpha)
47 { for(unsigned ik=x.ikStart(); ik<x.ikStop(); ik++)
48 { if(x.B1[ik]) x.B1[ik] *= alpha;
49 x.B2[ik] *= alpha;
50 }
51 return x;
52 }
axpy(double alpha,const WannierGradient & x,WannierGradient & y)53 void axpy(double alpha, const WannierGradient& x, WannierGradient& y)
54 { assert(x.wmin == y.wmin);
55 for(unsigned ik=x.ikStart(); ik<x.ikStop(); ik++)
56 { if(x.B1[ik]) axpy(alpha, x.B1[ik], y.B1[ik]);
57 axpy(alpha, x.B2[ik], y.B2[ik]);
58 }
59 }
randomize(WannierGradient & x)60 void randomize(WannierGradient& x)
61 { for(unsigned ik=x.ikStart(); ik<x.ikStop(); ik++)
62 { if(x.B1[ik]) randomize(x.B1[ik]); //off-diagonal block of Hermitian
63 if(x.B2[ik]) { randomize(x.B2[ik]); x.B2[ik] = dagger_symmetrize(x.B2[ik]); } //diagonal block
64 }
65 }
66
67 //---- energy/gradient functions required by Minimizable<WannierGradient> -----
68
step(const WannierGradient & grad,double alpha)69 void WannierMinimizer::step(const WannierGradient& grad, double alpha)
70 { static StopWatch watch("WannierMinimizer::step"); watch.start();
71 assert(grad.wmin == this);
72 for(unsigned ik=ikStart; ik<ikStop; ik++)
73 { KmeshEntry& ki = kMesh[ik];
74 //Stage 1:
75 if(ki.nIn > nCenters)
76 { int nFreeIn = ki.nIn - ki.nFixed;
77 int nFree = nCenters - ki.nFixed;
78 matrix B1 = zeroes(nFreeIn, nFreeIn);
79 B1.set(0,nFree, nFree,nFreeIn, alpha*grad.B1[ik]);
80 B1.set(nFree,nFreeIn, 0,nFree, alpha*dagger(grad.B1[ik]));
81 ki.U1.set(0,nBands, ki.nFixed,ki.nIn, ki.U1(0,nBands, ki.nFixed,ki.nIn) * cis(B1));
82 }
83 //Stage 2:
84 ki.U2.set(nFrozen,nCenters, nFrozen,nCenters, cis(alpha*grad.B2[ik]) * ki.U2(nFrozen,nCenters, nFrozen,nCenters));
85 //Net rotation:
86 ki.U = ki.U1 * ki.U2;
87 }
88 watch.stop();
89 bcastU(); //Make U available on all processes that need it
90 }
91
bcastU()92 void WannierMinimizer::bcastU()
93 { static StopWatch watch("WannierMinimizer::bcastU"); watch.start();
94 std::vector<MPIUtil::Request> requests;
95 for(KmeshEntry& ki: kMesh)
96 if(ki.mpi)
97 { MPIUtil::Request request;
98 ki.mpi->bcastData(ki.U, 0, &request);
99 requests.push_back(request);
100 }
101 MPIUtil::waitAll(requests);
102 watch.stop();
103 }
104
105
compute(WannierGradient * grad,WannierGradient * Kgrad)106 double WannierMinimizer::compute(WannierGradient* grad, WannierGradient* Kgrad)
107 { static StopWatch watch("WannierMinimizer::compute");
108 if(grad) for(KmeshEntry& ki: kMesh) if(ki.U) ki.Omega_UdotU = zeroes(nCenters, ki.nIn); //Clear gradient
109
110 double Omega = getOmega(grad);
111
112 //Collect Omega_U and propagate to Omega_B if necessary:
113 if(grad)
114 { watch.start();
115 std::vector<MPIUtil::Request> requests;
116 for(KmeshEntry& ki: kMesh)
117 if(ki.mpi)
118 { MPIUtil::Request request;
119 ki.mpi->reduceData(ki.Omega_UdotU, MPIUtil::ReduceSum, 0, &request); //Collect Omega_U
120 requests.push_back(request);
121 }
122 MPIUtil::waitAll(requests);
123 grad->init(this);
124 for(size_t ik=ikStart; ik<ikStop; ik++)
125 { KmeshEntry& ki = kMesh[ik];
126 matrix Omega_B = complex(0,0.5)*(ki.U2(0,ki.nIn, 0,nCenters) * ki.Omega_UdotU * dagger(ki.U2));
127 if(ki.nIn > nCenters) //Stage 1:
128 grad->B1[ik] = Omega_B(ki.nFixed,nCenters, nCenters,ki.nIn);
129 matrix Omega_B2_hlf = Omega_B(nFrozen,nCenters, nFrozen,nCenters);
130 grad->B2[ik] = Omega_B2_hlf + dagger(Omega_B2_hlf);
131 }
132 if(Kgrad) *Kgrad = precondition(*grad);
133 watch.stop();
134 }
135 return Omega;
136 }
137
constrain(WannierGradient & grad)138 void WannierMinimizer::constrain(WannierGradient& grad)
139 { for(size_t ik=ikStart; ik<ikStop; ik++)
140 grad.B2[ik] = dagger_symmetrize(grad.B2[ik]);
141 }
142
precondition(const WannierGradient & grad)143 WannierGradient WannierMinimizer::precondition(const WannierGradient& grad)
144 { WannierGradient Kgrad = grad;
145 constrain(Kgrad);
146 return Kgrad;
147 }
148
149
fixUnitary(const matrix & U,bool * isSingular)150 matrix WannierMinimizer::fixUnitary(const matrix& U, bool* isSingular)
151 { return U * invsqrt(dagger(U) * U, 0, 0, isSingular);
152 }
153
report(int iter)154 bool WannierMinimizer::report(int iter)
155 { //Check unitarity:
156 bool needRestart = false;
157 for(size_t ik=ikStart; ik<ikStop; ik++)
158 { KmeshEntry& ki = kMesh[ik];
159 if(nrm2(dagger(ki.U) * ki.U - eye(ki.nIn)) > 1e-6)
160 { needRestart = true;
161 break;
162 }
163 }
164 mpiWorld->allReduce(needRestart, MPIUtil::ReduceLOr);
165 if(needRestart)
166 { logPrintf("%s\tUpdating rotations to enforce unitarity\n", wannier.minParams.linePrefix);
167 ostringstream ossErr;
168 for(size_t ik=ikStart; ik<ikStop; ik++)
169 { KmeshEntry& ki = kMesh[ik];
170 bool isSingular = false;
171 ki.U1 = fixUnitary(ki.U1, &isSingular);
172 ki.U2 = fixUnitary(ki.U2, &isSingular);
173 if(isSingular)
174 { ossErr << "Unitary rotations are singular at k = [ "
175 << ki.point.k[0] << ' ' << ki.point.k[1] << ' ' << ki.point.k[2] << " ]\n";
176 break;
177 }
178 ki.U = ki.U1 * ki.U2;
179 }
180 mpiWorld->checkErrors(ossErr);
181 bcastU();
182 return true;
183 }
184 return false;
185 }
186
sync(double x) const187 double WannierMinimizer::sync(double x) const
188 { mpiWorld->bcast(x);
189 return x;
190 }
191
192 //---------------- kpoint and wavefunction handling -------------------
193
operator <(const WannierMinimizer::Kpoint & other) const194 bool WannierMinimizer::Kpoint::operator<(const WannierMinimizer::Kpoint& other) const
195 { if(iReduced!=other.iReduced) return iReduced<other.iReduced;
196 if(iSym!=other.iSym) return iSym<other.iSym;
197 if(invert!=other.invert) return invert<other.invert;
198 if(!(offset==other.offset)) return offset<other.offset;
199 return false; //all equal
200 }
201
operator ==(const WannierMinimizer::Kpoint & other) const202 bool WannierMinimizer::Kpoint::operator==(const WannierMinimizer::Kpoint& other) const
203 { if(iReduced!=other.iReduced) return false;
204 if(iSym!=other.iSym) return false;
205 if(invert!=other.invert) return false;
206 if(!(offset==other.offset)) return false;
207 return true;
208 }
209
getWfns(const WannierMinimizer::Kpoint & kpoint,int iSpin,std::vector<matrix> * VdagResult) const210 ColumnBundle WannierMinimizer::getWfns(const WannierMinimizer::Kpoint& kpoint, int iSpin, std::vector<matrix>* VdagResult) const
211 { ColumnBundle ret(nBands, basis.nbasis*nSpinor, &basis, &kpoint, isGpuEnabled());
212 ret.zero();
213 axpyWfns(1., matrix(), kpoint, iSpin, ret, VdagResult);
214 return ret;
215 }
216
217 #define axpyWfns_COMMON(result) \
218 /* Pick transform: */ \
219 const ColumnBundleTransform& transform = *(((result.basis==&basisSuper) ? transformMapSuper : transformMap).find(kpoint)->second); \
220 /* Pick source ColumnBundle: */ \
221 int q = kpoint.iReduced + iSpin*qCount; \
222 const ColumnBundle* C = e.eInfo.isMine(q) ? &e.eVars.C[q] : &Cother[q]; \
223 assert(*C);
224
axpyWfns(double alpha,const matrix & A,const WannierMinimizer::Kpoint & kpoint,int iSpin,ColumnBundle & result,std::vector<matrix> * VdagResult) const225 void WannierMinimizer::axpyWfns(double alpha, const matrix& A, const WannierMinimizer::Kpoint& kpoint, int iSpin, ColumnBundle& result, std::vector<matrix>* VdagResult) const
226 { static StopWatch watch("WannierMinimizer::axpyWfns"); watch.start();
227 axpyWfns_COMMON(result)
228 const std::vector<matrix>* VdagC = VdagResult ? (e.eInfo.isMine(q) ? &e.eVars.VdagC[q] : &VdagCother[q]) : 0;
229 //Apply transformation if provided:
230 ColumnBundle Cout; std::vector<matrix> VdagCout;
231 if(A)
232 { matrix Astar = (kpoint.invert<0 ? conj(A) : A);
233 Cout = (*C) * Astar;
234 C = &Cout;
235 //Similarly for projections, if needed:
236 if(VdagResult)
237 { VdagCout.resize(VdagC->size());
238 for(size_t iSp=0; iSp<VdagC->size(); iSp++)
239 if(VdagC->at(iSp))
240 VdagCout[iSp] = VdagC->at(iSp) * Astar;
241 VdagC = &VdagCout;
242 }
243 }
244 //Scatter from reduced basis to common basis with transformations:
245 assert(C->nCols() == result.nCols());
246 transform.scatterAxpy(alpha, *C, result,0,1);
247 //Corresponding transformation in projections:
248 if(VdagResult)
249 *VdagResult = transform.transformVdagC(*VdagC, kpoint.iSym);
250 watch.stop();
251 }
252
axpyWfns_grad(double alpha,matrix & Omega_A,const WannierMinimizer::Kpoint & kpoint,int iSpin,const ColumnBundle & Omega_result) const253 void WannierMinimizer::axpyWfns_grad(double alpha, matrix& Omega_A, const WannierMinimizer::Kpoint& kpoint, int iSpin, const ColumnBundle& Omega_result) const
254 { static StopWatch watch("WannierMinimizer::axpyWfns_grad"); watch.start();
255 axpyWfns_COMMON(Omega_result)
256 //Gather from common basis to reduced basis (=> conjugate transformations):
257 ColumnBundle Omega_C = C->similar(Omega_result.nCols());
258 Omega_C.zero();
259 transform.gatherAxpy(alpha, Omega_result,0,1, Omega_C);
260 //Propagate gradient to rotation matrix:
261 matrix Omega_Astar = Omega_C ^ *C;
262 Omega_A += (kpoint.invert<0 ? conj(Omega_Astar) : Omega_Astar);
263 watch.stop();
264 }
265
266 #undef axpyWfns_COMMON
267
268 //Gaussian orbital of specified width and angular momentum
gaussTilde(double G,double sigma,int l,double normPrefac)269 inline double gaussTilde(double G, double sigma, int l, double normPrefac)
270 { double Gsigma = G*sigma;
271 return normPrefac * std::pow(Gsigma,l) * exp(-0.5*Gsigma*Gsigma);
272 }
273
trialWfns(const WannierMinimizer::Kpoint & kpoint) const274 ColumnBundle WannierMinimizer::trialWfns(const WannierMinimizer::Kpoint& kpoint) const
275 { ColumnBundle ret(nCenters-nFrozen, basis.nbasis*nSpinor, &basis, &kpoint, isGpuEnabled());
276 ColumnBundle temp = ret.similar(1); //single column for intermediate computations
277 //Generate atomic orbitals if necessary:
278 std::vector<ColumnBundle> psiAtomic;
279 if(wannier.needAtomicOrbitals)
280 { psiAtomic.resize(e.iInfo.species.size());
281 for(unsigned sp=0; sp<e.iInfo.species.size(); sp++)
282 { psiAtomic[sp].init(e.iInfo.species[sp]->nAtomicOrbitals(), basis.nbasis*nSpinor, &basis, &kpoint, isGpuEnabled());
283 e.iInfo.species[sp]->setAtomicOrbitals(psiAtomic[sp], false);
284 }
285 }
286 ret.zero();
287 complex* retData = ret.dataPref();
288 for(const Wannier::TrialOrbital& t: wannier.trialOrbitals)
289 { for(const Wannier::AtomicOrbital& ao: t)
290 { //Handle numerical orbitals:
291 if(ao.numericalOrbIndex >= 0)
292 { const ColumnBundle& Cnum = *(numericalOrbitals.find(kpoint)->second);
293 //Apply offset to selected column:
294 assert(ao.numericalOrbIndex < Cnum.nCols());
295 temp = translate(Cnum.getSub(ao.numericalOrbIndex,ao.numericalOrbIndex+1), ao.r);
296 //Accumulate to result
297 callPref(eblas_zaxpy)(ret.colLength(), ao.coeff, temp.dataPref(),1, retData,1);
298 continue;
299 }
300 //Handle atomic orbitals:
301 const DOS::Weight::OrbitalDesc& od = ao.orbitalDesc;
302 complex lPhase = cis(0.5*M_PI*od.l); //including this phase ensures odd l projectors are real (i^l term in spherical wave expansion)
303 if(ao.sp >= 0)
304 { int iCol = e.iInfo.species[ao.sp]->atomicOrbitalOffset(ao.atom, od.n, od.l, od.m, od.s);
305 callPref(eblas_zaxpy)(ret.colLength(), ao.coeff*lPhase, psiAtomic[ao.sp].dataPref()+iCol*ret.colLength(),1, retData,1);
306 continue;
307 }
308 //Gaussian orbital:
309 if(ao.sigma > 0.)
310 { //--- Copy the center to managed memory:
311 ManagedArray<vector3<>> pos(&ao.r, 1);
312 //--- Get / create the radial part:
313 RadialFunctionG hRadial;
314 double Al = 0.25*sqrt(M_PI);
315 for(int p=1; p<=od.l; p++)
316 Al *= (p+0.5);
317 double sigma = (od.n+1) * ao.sigma;
318 double normPrefac = sqrt(std::pow(2*M_PI*sigma,3) / Al);
319 hRadial.init(od.l, 0.02, e.gInfo.GmaxSphere, gaussTilde, sigma, od.l, normPrefac);
320 //--- Initialize the projector:
321 assert(od.s < nSpinor);
322 if(nSpinor > 1) { temp.zero(); assert(od.spinType==SpinZ); }
323 callPref(Vnl)(basis.nbasis, basis.nbasis, 1, od.l, od.m, kpoint.k, basis.iGarr.dataPref(), e.gInfo.G, pos.dataPref(), hRadial, temp.dataPref()+od.s*basis.nbasis);
324 hRadial.free();
325 //--- Accumulate to trial orbital:
326 callPref(eblas_zaxpy)(ret.colLength(), ao.coeff*lPhase/e.gInfo.detR, temp.dataPref(),1, retData,1);
327 continue;
328 }
329 assert(!"Orbital was neither Numerical, Gaussian nor Atomic.");
330 }
331 retData += ret.colLength();
332 }
333 return ret;
334 }
335
overlap(const ColumnBundle & C1,const ColumnBundle & C2,const std::vector<matrix> * VdagC1ptr,const std::vector<matrix> * VdagC2ptr) const336 matrix WannierMinimizer::overlap(const ColumnBundle& C1, const ColumnBundle& C2, const std::vector<matrix>* VdagC1ptr, const std::vector<matrix>* VdagC2ptr) const
337 { static StopWatch watch("WannierMinimizer::overlap"); watch.start();
338 const GridInfo& gInfo = *(C1.basis->gInfo);
339 const IonInfo& iInfo = *(C1.basis->iInfo);
340 matrix ret = gInfo.detR * (C1 ^ C2);
341 //k-point difference:
342 vector3<> dkVec = C2.qnum->k - C1.qnum->k;
343 double dk = sqrt(gInfo.GGT.metric_length_squared(dkVec));
344 vector3<> dkHat = gInfo.GT * dkVec * (dk ? 1.0/dk : 0.0); //the unit Vector along dkVec (set dkHat to 0 for dk=0 (doesn't matter))
345 //Augment at each species:
346 for(size_t iSp=0; iSp<iInfo.species.size(); iSp++)
347 { const SpeciesInfo& sp = *(iInfo.species[iSp]);
348 if(!sp.isUltrasoft()) continue; //no augmentation
349 //Create the Q matrix appropriate for current k-point difference:
350 matrix Qk = zeroes(sp.QintAll.nRows(), sp.QintAll.nCols());
351 complex* QkData = Qk.data();
352 int i1 = 0;
353 for(int l1=0; l1<int(sp.VnlRadial.size()); l1++)
354 for(int p1=0; p1<int(sp.VnlRadial[l1].size()); p1++)
355 for(int m1=-l1; m1<=l1; m1++)
356 { //Triple loop over second projector:
357 int i2 = 0;
358 for(int l2=0; l2<int(sp.VnlRadial.size()); l2++)
359 for(int p2=0; p2<int(sp.VnlRadial[l2].size()); p2++)
360 for(int m2=-l2; m2<=l2; m2++)
361 { std::vector<YlmProdTerm> terms = expandYlmProd(l1,m1, l2,m2);
362 complex q12 = 0.;
363 for(const YlmProdTerm& term: terms)
364 { SpeciesInfo::QijIndex qIndex = { l1, p1, l2, p2, term.l };
365 auto Qijl = sp.Qradial.find(qIndex);
366 if(Qijl==sp.Qradial.end()) continue; //no entry at this l
367 q12 += term.coeff * cis(0.5*M_PI*(l2-l1-term.l)) * Ylm(term.l,term.m, dkHat) * Qijl->second(dk);
368 }
369 for(int s=0; s<nSpinor; s++)
370 QkData[Qk.index(i1+s,i2+s)] = q12;
371 i2 += nSpinor;
372 }
373 i1 += nSpinor;
374 }
375 if(sp.isRelativistic()) Qk = sp.fljAll * Qk * sp.fljAll;
376 //Phases for each atom:
377 std::vector<complex> phaseArr;
378 for(vector3<> x: sp.atpos)
379 phaseArr.push_back(cis(-2*M_PI*dot(dkVec,x)));
380 //Augment the overlap
381 matrix VdagC1 = VdagC1ptr ? VdagC1ptr->at(iSp) : (*sp.getV(C1)) ^ C1;
382 matrix VdagC2 = VdagC2ptr ? VdagC2ptr->at(iSp) : (*sp.getV(C2)) ^ C2;
383 ret += dagger(VdagC1) * (tiledBlockMatrix(Qk, sp.atpos.size(), &phaseArr) * VdagC2);
384 }
385 watch.stop();
386 return ret;
387 }
388
dumpWannierized(const matrix & Htilde,const matrix & phase,string varName,bool realPartOnly,int iSpin) const389 void WannierMinimizer::dumpWannierized(const matrix& Htilde, const matrix& phase, string varName, bool realPartOnly, int iSpin) const
390 {
391 string fname = wannier.getFilename(Wannier::FilenameDump, varName, &iSpin);
392 logPrintf("Dumping '%s' ... ", fname.c_str()); logFlush();
393 FILE* fp = 0;
394 if(mpiWorld->isHead())
395 { fp = fopen(fname.c_str(), "w");
396 if(!fp) die_alone("could not open file for writing.\n");
397 }
398 //Determine block size:
399 int nCells = phase.nCols();
400 int blockSize = ceildiv(nCells, mpiWorld->nProcesses()); //so that memory before and after FT roughly similar
401 int nBlocks = ceildiv(nCells, blockSize);
402 //Loop over blocks:
403 int iCellStart = 0;
404 double nrm2totSq = 0., nrm2imSq = 0.;
405 for(int iBlock=0; iBlock<nBlocks; iBlock++)
406 { int iCellStop = std::min(iCellStart+blockSize, nCells);
407 matrix Hblock = Htilde * phase(0,phase.nRows(), iCellStart,iCellStop);
408 mpiWorld->reduceData(Hblock, MPIUtil::ReduceSum);
409 if(mpiWorld->isHead())
410 { //Write to file:
411 if(realPartOnly)
412 { nrm2totSq += std::pow(nrm2(Hblock), 2);
413 nrm2imSq += std::pow(callPref(eblas_dnrm2)(Hblock.nData(), ((double*)Hblock.dataPref())+1, 2), 2); //imaginary parts with a stride of 2
414 Hblock.write_real(fp);
415 }
416 else Hblock.write(fp);
417 }
418 iCellStart = iCellStop;
419 }
420 if(mpiWorld->isHead()) fclose(fp);
421 if(realPartOnly)
422 { mpiWorld->bcast(nrm2totSq);
423 mpiWorld->bcast(nrm2imSq);
424 logPrintf("done. Relative discarded imaginary part: %le\n", sqrt(nrm2imSq / nrm2totSq));
425 }
426 else
427 logPrintf("done.\n");
428 }
429