1 /*
2   free space evals (k=6,tol=1e-4) from moldft
3   -3.0306e+01 -1.3228e+00 -4.9800e-01 -4.9800e-01 -4.9800e-01
4 
5   computed by testperiodic with gamma point L=30.0
6   -3.0304e+01 -1.3213e+00 -4.9782e-01 -4.9782e-01 -4.9782e-01
7 
8  */
9 
10 
11 //#define WORLD_INSTANTIATE_STATIC_TEMPLATES
12 #include <madness/mra/mra.h>
13 #include <madness/tensor/solvers.h>
14 using namespace madness;
15 
16 #include <chem/molecule.h>
17 #include <chem/molecularbasis.h>
18 #include <chem/xcfunctional.h>
19 
20 #include "subspace.h"
21 
22 static const double_complex I(0,1);
23 static const double twopi = 2.0*constants::pi;
24 
25 //static const double L = 5.0; // Unit cell size in au for neon
26 //static const double L = 8.37; // Unit cell size in au for neon
27 //static const double L = 7.65; // Unit cell size in au for LiF
28 //static const double L = 3.8; // Unit cell size in au for LiF
29 //static const double L = 8.0;
30 //static const double L = 10.26085381075144364474; // Unit cell size in au for Si
31 // static const double L = 10.3235; // Unit cell size in au for CaF2
32 static const double L = 10.6591; // Unit cell size in au for NaCl
33 
34 static const int maxR = 3; // periodic sums from -R to +R inclusive
35 static const double thresh = 1e-6;
36 static const double kwavelet = 20;
37 static const int truncate_mode = 0;
38 
39 static Molecule molecule;
40 static AtomicBasisSet aobasis;
41 static Subspace* subspace;
42 
43 typedef SeparatedConvolution<double,3> operatorT;
44 typedef SeparatedConvolution<double_complex,3> coperatorT;
45 typedef std::shared_ptr<operatorT> poperatorT;
46 typedef std::shared_ptr<coperatorT> pcoperatorT;
47 typedef std::pair<vector_complex_function_3d,vector_complex_function_3d> vcpairT;
48 
49 static double ttt, sss;
START_TIMER(World & world)50 static void START_TIMER(World& world) {
51     world.gop.fence(); ttt=wall_time(); sss=cpu_time();
52 }
53 
END_TIMER(World & world,const char * msg)54 static void END_TIMER(World& world, const char* msg) {
55     ttt=wall_time()-ttt; sss=cpu_time()-sss; if (world.rank()==0) printf("timer: %20.20s %8.2fs %8.2fs\n", msg, sss, ttt);
56 }
57 
58 class SplitterFunctor: public FunctionFunctorInterface<double,3> {
59 public:
operator ()(const coord_3d & r) const60     double operator()(const coord_3d& r) const {
61         return 2.0*r[2]*r[2] + r[0]*r[0];
62     }
63 };
64 
65 template <typename Q>
66 class ExpFunctor: public FunctionFunctorInterface<Q,3> {
67 private:
68     Q qx;
69     Q qy;
70     Q qz;
71 public:
ExpFunctor(Q qx,Q qy,Q qz)72     ExpFunctor(Q qx, Q qy, Q qz) : qx(qx), qy(qy), qz(qz) {}
operator ()(const coord_3d & x) const73     Q operator()(const coord_3d& x) const {
74       return std::exp(qx*x[0] + qy*x[1] + qz*x[2]);
75     }
76 };
77 
78 template <typename Q>
79 class ExpFunctor3d: public FunctionFunctorInterface<Q,3> {
80 private:
81     Q q0;
82     Q q1;
83     Q q2;
84 public:
ExpFunctor3d(Q q0,Q q1,Q q2)85     ExpFunctor3d(Q q0, Q q1, Q q2) : q0(q0), q1(q1), q2(q2) {}
operator ()(const coord_3d & x) const86     Q operator()(const coord_3d& x) const {
87       return std::exp(q0*x[0])*std::exp(q1*x[1])*std::exp(q2*x[2]);
88     }
89 };
90 
91 class MolecularGuessDensityFunctor : public FunctionFunctorInterface<double,3> {
92 private:
93     const Molecule& molecule;
94     const AtomicBasisSet& aobasis;
95     const int maxR = 2;
96 public:
MolecularGuessDensityFunctor(const Molecule & molecule,const AtomicBasisSet & aobasis)97     MolecularGuessDensityFunctor(const Molecule& molecule, const AtomicBasisSet& aobasis)
98         : molecule(molecule), aobasis(aobasis) {}
99 
operator ()(const coord_3d & x) const100     double operator()(const coord_3d& x) const {
101         double sum = 0.0;
102         for (int i=-maxR; i<=+maxR; i++) {
103             for (int j=-maxR; j<=+maxR; j++) {
104                 for (int k=-maxR; k<=+maxR; k++) {
105                     sum += aobasis.eval_guess_density(molecule, x[0]+i*L, x[1]+j*L, x[2]+k*L);
106                 }
107             }
108         }
109         return sum;
110     }
111 };
112 
113 class AtomicBasisFunctor : public FunctionFunctorInterface<double_complex,3> {
114 private:
115     const AtomicBasisFunction aofunc;
116     double kx, ky, kz;
117     std::vector<coord_3d> specialpt;
118 public:
AtomicBasisFunctor(const AtomicBasisFunction & aofunc,double kx,double ky,double kz)119     AtomicBasisFunctor(const AtomicBasisFunction& aofunc, double kx, double ky, double kz)
120         : aofunc(aofunc), kx(kx), ky(ky), kz(kz)
121     {
122     	double x, y, z;
123         aofunc.get_coords(x,y,z);
124         coord_3d r;
125         r[0]=x; r[1]=y; r[2]=z;
126         specialpt=std::vector<coord_3d>(1,r);
127     }
128 
operator ()(const coord_3d & x) const129     double_complex operator()(const coord_3d& x) const {
130         double_complex sum = 0.0;
131         for (int i=-maxR; i<=+maxR; i++) {
132             double xx = x[0]+i*L;
133             double rsq = xx*xx;
134             if (rsq < aofunc.rangesq()) {
135                 for (int j=-maxR; j<=+maxR; j++) {
136                     double yy = x[1]+j*L;
137                     rsq += yy*yy;
138                     if (rsq < aofunc.rangesq()) {
139                         for (int k=-maxR; k<=+maxR; k++) {
140                             double zz = x[2]+k*L;
141                             rsq += zz*zz;
142                             if (rsq < aofunc.rangesq()) {
143                                 sum += exp(-I*(kx*xx+ky*yy+kz*zz))*aofunc(xx, yy, zz);
144                             }
145                         }
146                     }
147                 }
148             }
149         }
150         return sum;
151     }
152 
special_points() const153     std::vector<coord_3d> special_points() const {return specialpt;}
154 };
155 
156 class AtomicOrbitalFunctor: public FunctionFunctorInterface<double,3> {
157 private:
158     const AtomicBasisFunction aofunc;
159     double R;
160     int offx, offy, offz;
161     std::vector<coord_3d> specialpt;
162     double rangesq;
163 public:
AtomicOrbitalFunctor(const AtomicBasisFunction & aofunc,double R,int offx,int offy,int offz)164     AtomicOrbitalFunctor(const AtomicBasisFunction& aofunc, double R, int offx, int offy, int offz)
165         : aofunc(aofunc), R(R), offx(offx), offy(offy), offz(offz)
166     {
167         double x, y, z;
168         aofunc.get_coords(x,y,z);
169         coord_3d r;
170         r[0]=x+offx*R; r[1]=y+offy*R; r[2]=z+offz*R;
171         specialpt=std::vector<coord_3d>(1,r);
172         rangesq = 3.0*aofunc.rangesq();
173     }
174 
operator ()(const coord_3d & x) const175     double operator()(const coord_3d& x) const {
176         return aofunc(x[0] + offx*R, x[1] + offy*R, x[2] + offz*R);
177     }
178 
special_points() const179     std::vector<coord_3d> special_points() const {return specialpt;}
180 };
181 
182 class NuclearDensityFunctor : public FunctionFunctorInterface<double,3> {
183 private:
184     const Molecule& molecule;
185     const double R;
186     std::vector<coord_3d> specialpt;
187     const int maxR = 1;
188 public:
NuclearDensityFunctor(const Molecule & molecule,double R)189     NuclearDensityFunctor(const Molecule& molecule, double R)
190         : molecule(molecule), R(R), specialpt(molecule.get_all_coords_vec())
191     {}
192 
operator ()(const coord_3d & x) const193     double operator()(const coord_3d& x) const {
194         double big = 2*R + 6.0*molecule.smallest_length_scale();
195         double sum = 0.0;
196         for (int i=-maxR; i<=+maxR; i++) {
197             double xx = x[0]+i*R;
198             if (xx < big && xx > -big) {
199                 for (int j=-maxR; j<=+maxR; j++) {
200                     double yy = x[1]+j*R;
201                     if (yy < big && yy > -big) {
202                         for (int k=-maxR; k<=+maxR; k++) {
203                             double zz = x[2]+k*R;
204                             if (zz < big && zz > -big)
205                                 sum += molecule.nuclear_charge_density(x[0]+i*R, x[1]+j*R, x[2]+k*R);
206                         }
207                     }
208                 }
209             }
210         }
211         return sum;
212     }
213 
special_points() const214     std::vector<coord_3d> special_points() const {return specialpt;}
215 
special_level()216     Level special_level() {
217         return 10;
218     }
219 
220 };
221 
222 class KPeriodicBSHOperator {
223 private:
224     double kx, ky, kz;
225     double L;
226     complex_function_3d phase_p;
227     complex_function_3d phase_m;
228 
229 public:
KPeriodicBSHOperator(World & world,const double & kx,const double & ky,const double & kz,const double & L)230     KPeriodicBSHOperator(World& world, const double& kx, const double& ky, const double& kz, const double& L)
231      : kx(kx), ky(ky), kz(kz), L(L) {
232         phase_p = complex_factory_3d(world).functor(complex_functor_3d(
233           new ExpFunctor3d<double_complex>(I*kx,I*ky,I*kz))).truncate_mode(0).truncate_on_project();
234         phase_m = complex_factory_3d(world).functor(complex_functor_3d(
235           new ExpFunctor3d<double_complex>(-I*kx,-I*ky,-I*kz))).truncate_mode(0).truncate_on_project();
236     }
237 
KPeriodicBSHOperator(World & world,const Vector<double,3> & kpt,const double & L)238     KPeriodicBSHOperator(World& world, const Vector<double,3>& kpt, const double& L)
239      : kx(kpt[0]), ky(kpt[1]), kz(kpt[2]), L(L) {
240         phase_p = complex_factory_3d(world).functor(complex_functor_3d(
241           new ExpFunctor3d<double_complex>(I*kx,I*ky,I*kz))).truncate_mode(0).truncate_on_project();
242         phase_m = complex_factory_3d(world).functor(complex_functor_3d(
243           new ExpFunctor3d<double_complex>(-I*kx,-I*ky,-I*kz))).truncate_mode(0).truncate_on_project();
244     }
245 
apply(World & world,const vector_complex_function_3d & v,const tensor_real & evals,double shift=0.0)246     vector_complex_function_3d apply(World& world, const vector_complex_function_3d& v, const tensor_real& evals, double shift = 0.0) {
247         START_TIMER(world);
248         int nmo = evals.dim(0);
249         vector<pcoperatorT> ops(nmo);
250         for(int i = 0;i < nmo; ++i){
251             double eps = evals(i) + shift;
252             ops[i] = pcoperatorT(PeriodicBSHOperatorPtr3D(world, vec(-kx*L, -ky*L, -kz*L), sqrt(-2.0*eps),  1e-4, FunctionDefaults<3>::get_thresh()));
253         }
254         vector_complex_function_3d t1 = mul(world,phase_p,v);
255         truncate(world,t1);
256         vector_complex_function_3d t2 = ::apply(world,ops,t1);
257         vector_complex_function_3d t3 = mul(world,phase_m,t2);
258         return t3;
259         END_TIMER(world, "apply periodic bsh");
260     }
261 };
262 
makeao_slow(World & world,const std::vector<Vector<double,3>> & kpoints)263 vector_complex_function_3d makeao_slow(World& world, const std::vector<Vector<double,3> >& kpoints) {
264     int nkpt = kpoints.size();
265     int nbf = aobasis.nbf(molecule);
266     vector_complex_function_3d ao(nkpt*nbf);
267     for (int ik = 0; ik < nkpt; ++ik) {
268         Vector<double,3> kvec = kpoints[ik];
269         double kx = kvec[0]; double ky = kvec[1]; double kz = kvec[2];
270         for(int i = 0; i<aobasis.nbf(molecule); ++i) {
271             aobasis.get_atomic_basis_function(molecule, i).print_me(std::cout);
272             complex_functor_3d aofunc(
273                new AtomicBasisFunctor(
274                aobasis.get_atomic_basis_function(molecule, i), kx, ky, kz));
275             ao[ik*nbf+i] = complex_factory_3d(world).functor(aofunc).truncate_on_project().nofence().truncate_mode(0);
276         }
277     }
278     world.gop.fence();
279     normalize(world,ao);
280     truncate(world,ao);
281     return ao;
282 }
283 
makeao(World & world,const std::vector<Vector<double,3>> & kpts,double R)284 vector_complex_function_3d makeao(World& world, const std::vector<Vector<double,3> >& kpts, double R) {
285     START_TIMER(world);
286     int nkpt = kpts.size();
287     int nbf = aobasis.nbf(molecule);
288     double t1 = 1./std::sqrt((double)nbf);
289     vector_complex_function_3d aos = zero_functions_compressed<double_complex,3>(world, nkpt*nbf);
290     if (world.rank() == 0) print("nbf:  ", nbf);
291     for (int ibf = 0; ibf < nbf; ibf++) {
292         if (world.rank() == 0) print("\n\nibf: ", ibf);
293         for (int i = -maxR; i <= maxR; i++) {
294             for (int j = -maxR; j <= maxR; j++) {
295                 for (int k = -maxR; k <= maxR; k++) {
296                     AtomicBasisFunction abf = aobasis.get_atomic_basis_function(molecule, ibf);
297                     //if ((i*i+j*j+k*k)*L*L < 3.0*abf.rangesq()) {
298                     if (true) {
299                         real_functor_3d aofunc(
300                            new AtomicOrbitalFunctor(
301                            aobasis.get_atomic_basis_function(molecule, ibf), L, i, j, k));
302                         real_function_3d ao = real_factory_3d(world).functor(aofunc).truncate_on_project().truncate_mode(0);
303                         ao.compress();
304                         for (int ik = 0; ik < nkpt; ik++) {
305                             Vector<double,3> kpt = kpts[ik];
306                             complex_function_3d t2 = t1*std::exp(double_complex(0.0,(i*kpt[0]+j*kpt[1]+k*kpt[2])*R))*ao;
307                             aos[ik*nbf+ibf].gaxpy(1.0,t2,1.0);
308                         }
309                     }
310                 }
311             }
312         }
313     }
314     for (int ik = 0; ik < nkpt; ik++) {
315         Vector<double,3> kpt = kpts[ik];
316         complex_function_3d phase_m = complex_factory_3d(world).functor(complex_functor_3d(
317           new ExpFunctor3d<double_complex>(-I*kpt[0],-I*kpt[1],-I*kpt[2]))).truncate_mode(0).truncate_on_project();
318         for (int ibf = 0; ibf < nbf; ibf++) {
319             aos[ik*nbf+ibf] = aos[ik*nbf+ibf]*phase_m;
320         }
321     }
322     END_TIMER(world, "makeao");
323     return aos;
324 }
325 
326 //vector_complex_function_3d makeao(World& world, const std::vector<Vector<double,3> >& kpts, double R) {
327 //    START_TIMER(world);
328 //    int nkpt = kpts.size();
329 //    int nbf = aobasis.nbf(molecule);
330 //    double t1 = 1./std::sqrt((double)nbf);
331 //    vector_complex_function_3d ao = zero_functions_compressed<double_complex,3>(world, std:pow(2*maxR+1, 3)*nbf);
332 //    vector_complex_function_3d aos = zero_functions_compressed<double_complex,3>(world, nkpt*nbf);
333 //    print("nbf:  ", nbf);
334 //    for (int ibf = 0; ibf < nbf; ibf++) {
335 //        print("\n\nibf: ", ibf);
336 //        for (int i = -maxR; i <= maxR; i++) {
337 //            for (int j = -maxR; j <= maxR; j++) {
338 //                for (int k = -maxR; k <= maxR; k++) {
339 //                    AtomicBasisFunction abf = aobasis.get_atomic_basis_function(molecule, ibf);
340 //                    //if ((i*i+j*j+k*k)*L*L < 3.0*abf.rangesq()) {
341 //                    if (true) {
342 //                        real_functor_3d aofunc(
343 //                           new AtomicOrbitalFunctor(
344 //                           aobasis.get_atomic_basis_function(molecule, ibf), L, i, j, k));
345 //                        ao[i = real_factory_3d(world).functor(aofunc).truncate_on_project().truncate_mode(0);
346 //                    }
347 //                }
348 //            }
349 //        }
350 //    }
351 //    for (int ik = 0; ik < nkpt; ik++) {
352 //        Vector<double,3> kpt = kpts[ik];
353 //        complex_function_3d t2 = t1*std::exp(double_complex(0.0,(i*kpt[0]+j*kpt[1]+k*kpt[2])*R))*ao;
354 //        aos[ik*nbf+ibf].gaxpy(1.0,t2,1.0);
355 //    }
356 //    for (int ik = 0; ik < nkpt; ik++) {
357 //        Vector<double,3> kpt = kpts[ik];
358 //        complex_function_3d phase_m = complex_factory_3d(world).functor(complex_functor_3d(
359 //          new ExpFunctor3d<double_complex>(-I*kpt[0],-I*kpt[1],-I*kpt[2]))).truncate_mode(0).truncate_on_project();
360 //        for (int ibf = 0; ibf < nbf; ibf++) {
361 //            aos[ik*nbf+ibf] = aos[ik*nbf+ibf]*phase_m;
362 //        }
363 //    }
364 //    END_TIMER(world, "makeao");
365 //    return aos;
366 //}
367 
make_kinetic_matrix(World & world,const vector_complex_function_3d & v,const Vector<double,3> & kpt)368 tensor_complex make_kinetic_matrix(World& world, const vector_complex_function_3d& v, const Vector<double,3>& kpt) {
369     START_TIMER(world);
370     double kx = kpt[0]; double ky = kpt[1]; double kz = kpt[2];
371     complex_derivative_3d Dx(world, 0);
372     complex_derivative_3d Dy(world, 1);
373     complex_derivative_3d Dz(world, 2);
374 
375     auto dvx = apply(world, Dx, v);
376     auto dvy = apply(world, Dy, v);
377     auto dvz = apply(world, Dz, v);
378 
379     // -1/2 (del + ik)^2 = -1/2 del^2 - i k.del + 1/2 k^2
380     // -1/2 <p|del^2|q> = +1/2 <del p | del q>
381 
382     auto f1 = 0.5 * (matrix_inner(world, dvx, dvx, false) +
383                      matrix_inner(world, dvy, dvy, false) +
384                      matrix_inner(world, dvz, dvz, false));
385 
386     auto f2 =
387         (-I*kx)*matrix_inner(world, v, dvx, false) +
388         (-I*ky)*matrix_inner(world, v, dvy, false) +
389         (-I*kz)*matrix_inner(world, v, dvz, false);
390 
391     auto f3 = (0.5 * (kx*kx + ky*ky + kz*kz)) * matrix_inner(world, v, v, true);
392     END_TIMER(world, "kinetic energy");
393 
394     return f1 + f2 + f3;
395 }
396 
apply_potential(World & world,const real_function_3d & potential,const vector_complex_function_3d & psi)397 vector_complex_function_3d apply_potential(World& world, const real_function_3d& potential, const vector_complex_function_3d& psi)
398 {
399     START_TIMER(world);
400     auto vpsi = mul(world, potential, psi, false);
401     world.gop.fence();
402     END_TIMER(world, "apply potential");
403     return vpsi;
404 }
405 
orth(World & world,const vector_complex_function_3d & v,double thresh=1.e-10)406 vector_complex_function_3d orth(World& world, const vector_complex_function_3d& v, double thresh = 1.e-10) {
407     auto vsize = v.size();
408     auto ov_mat = matrix_inner(world, v, v, true);
409     for (unsigned int i=0; i<v.size(); i++) {
410         for (unsigned int j=0; j<i; j++) {
411             if (std::abs(ov_mat(i,j)) < thresh*1e-1) {
412                 ov_mat(i,j) = ov_mat(j,i) = 0.0;
413             }
414         }
415     }
416     tensor_complex U;
417     tensor_real D;
418     syev(ov_mat, U, D);
419     print("D: ");
420     print(D);
421     auto indx = -1;
422     for (unsigned int i = 0; i < vsize && indx < 0; i++) {
423       if (std::abs(D(i)) > thresh) {
424         indx = i;
425       }
426     }
427     U = copy(U(_,Slice(indx,vsize-1)));
428     auto R = transform(world, v, U);
429     normalize(world, R, true);
430     ov_mat = matrix_inner(world, R, R, true);
431     print("new overlap: ");
432     print(ov_mat);
433     return R;
434 }
435 
diag_and_transform(World & world,const Vector<double,3> kpt,const real_function_3d & v,const vector_complex_function_3d & psik,int nmo=0)436 std::pair<vector_complex_function_3d, tensor_real> diag_and_transform(World& world,
437                                                                       const Vector<double,3> kpt,
438                                                                       const real_function_3d& v,
439                                                                       const vector_complex_function_3d& psik,
440                                                                       int nmo = 0) {
441     auto vpsik = apply_potential(world, v, psik);
442     auto ke_mat = make_kinetic_matrix(world, psik, kpt);
443     auto pe_mat = matrix_inner(world, psik, vpsik, true);
444     auto ov_mat = matrix_inner(world, psik, psik, true);
445 
446     tensor_complex fock = ke_mat + pe_mat;
447     // eliminate small off-diagonal elements and lift diagonal
448     // degeneracies to reduce random mixing
449     for (unsigned int i=0; i<psik.size(); i++) {
450         fock(i,i) += i*thresh*1e-2;
451         for (unsigned int j=0; j<i; j++) {
452             if (std::abs(fock(i,j)) < thresh*1e-1 || std::abs(ov_mat(i,j)) < thresh*1e-1) {
453                 fock(i,j) = fock(j,i) = 0.0;
454                 ov_mat(i,j) = ov_mat(j,i) = 0.0;
455             }
456         }
457     }
458 
459     // print("H:\n"); print(fock);
460     // print("S:\n"); print(ov_mat);
461     tensor_complex c;
462     tensor_real e;
463     sygv(fock, ov_mat, 1, c, e);
464 
465     if (nmo > 0) {
466         c = copy(c(_,Slice(0,nmo-1))); // truncate to occupied states
467         e = e(Slice(0,nmo-1));
468     }
469 
470     auto new_psik = transform(world, psik, c);
471     return std::pair<vector_complex_function_3d, tensor_real>(new_psik, e);
472 }
473 
make_lda_potential(World & world,const real_function_3d & rho)474 real_function_3d make_lda_potential(World& world, const real_function_3d &rho)
475 {
476     START_TIMER(world);
477     auto vlda = copy(rho);
478     vlda.reconstruct();
479     vlda.unaryop(xc_lda_potential());
480     END_TIMER(world, "lda potential");
481     return vlda;
482 }
483 
make_coulomb_potential(World & world,const real_function_3d & rho)484 real_function_3d make_coulomb_potential(World& world, const real_function_3d& rho)
485 {
486     START_TIMER(world);
487     real_convolution_3d op = CoulombOperator(world, 1e-4, thresh);
488     END_TIMER(world, "hartree potential");
489     return op(rho);
490 }
491 
make_bsh_operators(World & world,const tensor_real & evals,double shift)492 vector<poperatorT> make_bsh_operators(World & world, const tensor_real& evals, double shift)
493 {
494     int nmo = evals.dim(0);
495     vector<poperatorT> ops(nmo);
496     for(int i = 0;i < nmo; ++i){
497         double eps = evals(i) + shift;
498         ops[i] = poperatorT(BSHOperatorPtr3D(world, sqrt(-2.0 * eps),  1e-4, thresh));
499     }
500     return ops;
501 }
502 
orthogonalize(World & world,vector_complex_function_3d & psi)503 void orthogonalize(World& world, vector_complex_function_3d& psi) {
504     START_TIMER(world);
505     compress(world, psi);
506     for (unsigned int i = 0; i<psi.size(); i++) {
507         complex_function_3d& psi_i = psi[i];
508         psi_i.scale(1.0/psi_i.norm2());
509         for (unsigned int j = 0; j<i; j++) {
510             complex_function_3d& psi_j = psi[j];
511             double_complex s = inner(psi_j,psi_i);
512             psi_i.gaxpy(1.0,psi_j,-s); // |i> = |i> - |j><j|i>
513             psi_i.scale(1.0/psi_i.norm2());
514         }
515     }
516     END_TIMER(world, "orthogonalize");
517 }
518 
519 // function to apply BSH with twisted PBC
520 // kx, ky, kz -- some k value in the 1BZ (e.g. 0.5*2.0*pi/L where L is the lattice constant)
521 // energy     -- bound state energy (should be negative)
522 // L          -- lattice constant
523 //
524 // Obviously this is slow
apply_periodic_bsh(World & world,const complex_function_3d & f,const double & kx,const double & ky,const double & kz,const double & energy,const double & L)525 complex_function_3d apply_periodic_bsh(World& world, const complex_function_3d& f,
526                                        const double& kx, const double& ky, const double& kz,
527                                        const double& energy, const double& L) {
528   complex_function_3d phase_p = complex_factory_3d(world).functor(complex_functor_3d(
529     new ExpFunctor3d<double_complex>(I*kx,I*ky,I*kz))).truncate_mode(0).truncate_on_project();
530   complex_function_3d phase_m = complex_factory_3d(world).functor(complex_functor_3d(
531     new ExpFunctor3d<double_complex>(-I*kx,-I*ky,-I*kz))).truncate_mode(0).truncate_on_project();
532   auto op = PeriodicBSHOperator3D(world, vec(-kx*L, -ky*L, -kz*L), sqrt(-2.0*(energy)),  1e-4, FunctionDefaults<3>::get_thresh());
533   complex_function_3d g = phase_m*apply(op, phase_p*f);
534   return g;
535 }
536 
537 // function to apply BSH with twisted PBC
apply_periodic_bsh(World & world,const complex_function_3d & f,const Vector<double,3> & kpt,const double & energy,const double & L)538 complex_function_3d apply_periodic_bsh(World& world, const complex_function_3d& f,
539                                        const Vector<double,3>& kpt,
540                                        const double& energy,
541                                        const double& L) {
542     return apply_periodic_bsh(world,f,kpt[0],kpt[1],kpt[2],energy,L);
543 }
544 
545 // DESTROYS VPSI
update(World & world,int ik,const vector_complex_function_3d & psi,vector_complex_function_3d & vpsi,const Vector<double,3> & kpt,const real_function_3d & v,const tensor_real & e)546 vcpairT update(World& world,
547                int ik,
548                const vector_complex_function_3d& psi,
549                vector_complex_function_3d& vpsi,
550                const Vector<double,3>& kpt,
551                const real_function_3d& v,
552                const tensor_real& e)
553 {
554     int nmo = psi.size();
555     double kx = kpt[0]; double ky = kpt[1]; double kz = kpt[2];
556 
557     // determine shift to make homo <=-0.1
558     double shift = 0.0;
559     if (e(nmo-1) > -0.1) {
560         shift = -0.1 - e(nmo-1);
561         gaxpy(world, 1.0, vpsi, shift, psi);
562     }
563 
564     // Do the BSH thing
565     scale(world, vpsi, -2.0);
566     //truncate(world, vpsi);
567 
568     //vector_complex_function_3d new_psi(nmo);
569     //for (int iorb = 0; iorb < nmo; iorb++) {
570     //  new_psi[iorb] = apply_periodic_bsh(world, vpsi[iorb], kx, ky, kz, e[iorb]+shift, L);
571     //}
572 
573     KPeriodicBSHOperator kop(world, kx, ky, kz, L);
574     vector_complex_function_3d new_psi = kop.apply(world, vpsi, e, shift);
575     vector_complex_function_3d rm = sub(world, psi, new_psi);
576     truncate(world,rm);
577 
578     if (world.rank() == 0) printf("kpoint:  %10.5f    %10.5f    %10.5f\n",kpt[0],kpt[1],kpt[2]);
579     if (world.rank() == 0) printf("      eigenvalue    residual\n");
580     for (int i=0; i<nmo; i++) {
581         double rnorm = rm[i].norm2();
582         if (world.rank() == 0) printf("%4d  %10.6f  %10.1e\n", i, e[i], rnorm);
583     }
584     return vcpairT(new_psi,rm);
585 }
586 
make_density(World & world,const vector_complex_function_3d & v,double weight)587 real_function_3d make_density(World& world, const vector_complex_function_3d& v, double weight) {
588     START_TIMER(world);
589     real_function_3d rho(world);
590     for (unsigned int i=0; i<v.size(); i++) {
591         rho = rho + weight*abssq(v[i]);
592     }
593     rho.scale(2.0); // total closed-shell density
594     rho.truncate();
595     END_TIMER(world, "make density");
596     return rho;
597 }
598 
599 // Return all orbitals for all kpoints
600 // Modifies the density rho
initial_guess(World & world,const real_function_3d & vnuc,real_function_3d & rho,const std::vector<Vector<double,3>> & kpoints,int nst)601 vector_complex_function_3d initial_guess(World& world, const real_function_3d& vnuc, real_function_3d& rho, const std::vector<Vector<double,3> >& kpoints, int nst) {
602     auto psi0 = makeao(world, kpoints, L);
603     auto v = vnuc + make_coulomb_potential(world,rho) + make_lda_potential(world,rho);
604     auto vpsi = apply_potential(world, v, psi0);
605 
606     int nkpt = kpoints.size();
607     int nsize = psi0.size();
608     int nst_initial = nsize/nkpt;
609     MADNESS_ASSERT(nst <= nst_initial);
610     if (world.rank() == 0) print("nsize: ", nsize);
611     if (world.rank() == 0) print("nst_initial: ", nst_initial);
612     vector_complex_function_3d psi;
613     for (int ik = 0; ik < nkpt; ++ik) {
614         vector_complex_function_3d psik(psi0.begin()+ik*nst_initial, psi0.begin()+(ik+1)*nst_initial);
615         vector_complex_function_3d vpsik(vpsi.begin()+ik*nst_initial, vpsi.begin()+(ik+1)*nst_initial);
616 
617         auto ke_mat = make_kinetic_matrix(world, psik, kpoints[ik]);
618         auto pe_mat = matrix_inner(world, psik, vpsik, true);
619         auto ov_mat = matrix_inner(world, psik, psik, true);
620 
621         auto fock = ke_mat + pe_mat;
622         // eliminate small off-diagonal elements and lift diagonal
623         // degeneracies to reduce random mixing
624         for (unsigned int i=0; i<psik.size(); i++) {
625             fock(i,i) += i*thresh*1e-2;
626             for (unsigned int j=0; j<i; j++) {
627                 if (std::abs(fock(i,j)) < thresh*1e-1 && std::abs(ov_mat(i,j)) < thresh*1e-1) {
628                     fock(i,j) = fock(j,i) = 0.0;
629                     ov_mat(i,j) = ov_mat(j,i) = 0.0;
630                 }
631             }
632         }
633 
634         tensor_complex c;
635         tensor_real e;
636         fock = 0.5*(fock + transpose(fock));
637         ov_mat = 0.5*(ov_mat + transpose(ov_mat));
638 
639         // print("Initial Fock: "); print(real(fock));
640         // print("Initial Overlap: "); print(real(ov_mat));
641         sygv(fock, ov_mat, 1, c, e);
642 
643         if (world.rank() == 0) print("initial_guess() ik = ", ik);
644         if (world.rank() == 0) print(e);
645 
646         psik = transform(world, psik, c);
647         vpsik = transform(world, vpsik, c);
648         for (int ist = 0; ist < nst; ist++) {
649             if (world.rank() == 0) print("pushing back ist = ", ist);
650             psi.push_back(psik[ist]);
651         }
652     }
653 
654     // compute new density
655     double weights = 1.0/(double)kpoints.size();
656     rho = make_density(world,psi,weights);
657     return psi;
658 }
659 
matrix_exponential(const tensor_complex & A)660 tensor_complex matrix_exponential(const tensor_complex& A) {
661     const double tol = 1e-13;
662     MADNESS_ASSERT(A.dim(0) == A.dim(1));
663 
664     // Scale A by a power of 2 until it is "small"
665     double anorm = A.normf();
666     int n = 0;
667     double scale = 1.0;
668     while (anorm*scale > 0.1)
669     {
670         n++;
671         scale *= 0.5;
672     }
673     tensor_complex B = scale*A;    // B = A*2^-n
674 
675     // Compute exp(B) using Taylor series
676     tensor_complex expB = tensor_complex(2, B.dims());
677     for (int i = 0; i < expB.dim(0); i++) expB(i,i) = double_complex(1.0,0.0);
678 
679     int k = 1;
680     tensor_complex term = B;
681     while (term.normf() > tol)
682     {
683         expB += term;
684         term = inner(term,B);
685         k++;
686         term.scale(1.0/k);
687     }
688 
689     // Repeatedly square to recover exp(A)
690     while (n--)
691     {
692         expB = inner(expB,expB);
693     }
694 
695     return expB;
696 }
697 
fixphases(World & world,tensor_real & e,tensor_complex & U)698 void fixphases(World& world, tensor_real& e, tensor_complex& U) {
699     int nmo = U.dim(0);
700     long imax;
701     for (long j = 0; j < nmo; j++)
702     {
703         // Get index of largest value in column
704         U(_,j).absmax(&imax);
705         double_complex ang = arg(U(imax,j));
706         double_complex phase = std::exp(-ang*I);
707         // Loop through the rest of the column and divide by the phase
708         for (long i = 0; i < nmo; i++)
709         {
710             U(i,j) *= phase;
711         }
712     }
713 
714     // Within blocks with the same occupation number attempt to
715     // keep orbitals in the same order (to avoid confusing the
716     // non-linear solver).  Have to run the reordering multiple
717     // times to handle multiple degeneracies.
718     int maxpass = 15;
719     for (int pass = 0; pass < maxpass; pass++)
720     {
721         long j;
722         for (long i = 0; i < nmo; i++)
723         {
724             U(_, i).absmax(&j);
725             if (i != j)
726             {
727               tensor_complex tmp = copy(U(_, i));
728               U(_, i) = U(_, j);
729               U(_, j) = tmp;
730               //swap(e[i], e[j]);
731               double ti = e[i];
732               double tj = e[j];
733               e[i] = tj; e[j] = ti;
734             }
735         }
736     }
737 
738     // Rotations between effectively degenerate states confound
739     // the non-linear equation solver ... undo these rotations
740     long ilo = 0; // first element of cluster
741     while (ilo < nmo-1) {
742         long ihi = ilo;
743         while (fabs(e[ilo]-e[ihi+1]) < thresh*700.0*std::max(fabs(e[ilo]),1.0)) {
744             ihi++;
745             if (ihi == nmo-1) break;
746         }
747         long nclus = ihi - ilo + 1;
748         if (nclus > 1) {
749             if (world.rank() == 0) print("   found cluster", ilo, ihi, e[ilo]);
750             tensor_complex q = copy(U(Slice(ilo,ihi),Slice(ilo,ihi)));
751             //print(q);
752             // Special code just for nclus=2
753             // double c = 0.5*(q(0,0) + q(1,1));
754             // double s = 0.5*(q(0,1) - q(1,0));
755             // double r = sqrt(c*c + s*s);
756             // c /= r;
757             // s /= r;
758             // q(0,0) = q(1,1) = c;
759             // q(0,1) = -s;
760             // q(1,0) = s;
761 
762             // Iteratively construct unitary rotation by
763             // exponentiating the antisymmetric part of the matrix
764             // ... is quadratically convergent so just do 3
765             // iterations
766             tensor_complex rot = matrix_exponential(-0.5*(q - conj_transpose(q)));
767             q = inner(q,rot);
768             tensor_complex rot2 = matrix_exponential(-0.5*(q - conj_transpose(q)));
769             q = inner(q,rot2);
770             tensor_complex rot3 = matrix_exponential(-0.5*(q - conj_transpose(q)));
771             q = inner(rot,inner(rot2,rot3));
772             U(_,Slice(ilo,ihi)) = inner(U(_,Slice(ilo,ihi)),q);
773         }
774         ilo = ihi+1;
775     }
776 
777 }
fixphases(World & world,tensor_real & e,tensor_complex & U,vector_complex_function_3d & psik)778 void fixphases(World& world, tensor_real& e, tensor_complex& U, vector_complex_function_3d& psik) {
779     int nmo = U.dim(0);
780     long imax;
781     for (long j = 0; j < nmo; j++)
782     {
783         // Get index of largest value in column
784         U(_,j).absmax(&imax);
785         double_complex ang = arg(U(imax,j));
786         double_complex phase = std::exp(-ang*I);
787         // Loop through the rest of the column and divide by the phase
788         for (long i = 0; i < nmo; i++)
789         {
790             U(i,j) *= phase;
791         }
792     }
793 
794     //// Within blocks with the same occupation number attempt to
795     //// keep orbitals in the same order (to avoid confusing the
796     //// non-linear solver).  Have to run the reordering multiple
797     //// times to handle multiple degeneracies.
798     //int maxpass = 5;
799     //for (int pass = 0; pass < maxpass; pass++)
800     //{
801     //    long j;
802     //    for (long i = 0; i < nmo; i++)
803     //    {
804     //        U(_, i).absmax(&j);
805     //        if (i != j)
806     //        {
807     //          tensor_complex tmp = copy(U(_, i));
808     //          U(_, i) = U(_, j);
809     //          U(_, j) = tmp;
810     //          //swap(e[i], e[j]);
811     //          double ti = e[i];
812     //          double tj = e[j];
813     //          e[i] = tj; e[j] = ti;
814     //        }
815     //    }
816     //}
817 
818     // Rotations between effectively degenerate states confound
819     // the non-linear equation solver ... undo these rotations
820     long ilo = 0; // first element of cluster
821     while (ilo < nmo-1) {
822         long ihi = ilo;
823         while (fabs(e[ilo]-e[ihi+1]) < thresh*10.0*std::max(fabs(e[ilo]),1.0)) {
824             ihi++;
825             if (ihi == nmo-1) break;
826         }
827         long nclus = ihi - ilo + 1;
828 //        if (nclus > 1) {
829 //            //if (world.rank() == 0) print("   found cluster", ilo, ihi);
830 //            tensor_complex q = copy(U(Slice(ilo,ihi),Slice(ilo,ihi)));
831 //            //print(q);
832 //            // Special code just for nclus=2
833 //            // double c = 0.5*(q(0,0) + q(1,1));
834 //            // double s = 0.5*(q(0,1) - q(1,0));
835 //            // double r = sqrt(c*c + s*s);
836 //            // c /= r;
837 //            // s /= r;
838 //            // q(0,0) = q(1,1) = c;
839 //            // q(0,1) = -s;
840 //            // q(1,0) = s;
841 //
842 //            // Iteratively construct unitary rotation by
843 //            // exponentiating the antisymmetric part of the matrix
844 //            // ... is quadratically convergent so just do 3
845 //            // iterations
846 //            tensor_complex rot = matrix_exponential(-0.5*(q - conj_transpose(q)));
847 //            q = inner(q,rot);
848 //            tensor_complex rot2 = matrix_exponential(-0.5*(q - conj_transpose(q)));
849 //            q = inner(q,rot2);
850 //            tensor_complex rot3 = matrix_exponential(-0.5*(q - conj_transpose(q)));
851 //            q = inner(rot,inner(rot2,rot3));
852 //            U(_,Slice(ilo,ihi)) = inner(U(_,Slice(ilo,ihi)),q);
853 //        }
854         if (nclus > 1) {
855             real_function_3d splitter = real_factory_3d(world).functor(real_functor_3d(
856               new SplitterFunctor())).truncate_mode(0).truncate_on_project();
857             vector_complex_function_3d psik_cluster(psik.begin()+ilo, psik.begin()+ihi+1);
858             vector_complex_function_3d sfuncs = mul(world,splitter,psik_cluster);
859             tensor_complex Mcluster = matrix_inner(world,psik_cluster,sfuncs);
860             tensor_real eigs; tensor_complex uvecs;
861             syev(Mcluster, uvecs, eigs);
862             if (world.rank() == 0) {
863                 print("Found cluster of size: ", nclus, "with energy = ", e[ilo]);
864                 // print("cluster matrix:"); print(Mcluster);
865                 // print("cluster eigs:"); print(eigs);
866                 // print("cluster eigenvectors:"); print(uvecs);
867             }
868             psik_cluster = transform(world, psik_cluster, uvecs);
869             for (int ist = 0; ist < nclus; ist++) {
870                 psik[ilo+ist] = psik_cluster[ist];
871             }
872         }
873         ilo = ihi+1;
874     }
875 
876 }
877 
main(int argc,char ** argv)878 int main(int argc, char** argv) {
879     initialize(argc, argv);
880     World world(SafeMPI::COMM_WORLD);
881     startup(world,argc,argv);
882     std::cout.precision(6);
883     FunctionDefaults<3>::set_thresh(thresh);
884     FunctionDefaults<3>::set_k(kwavelet);
885     FunctionDefaults<3>::set_bc(BoundaryConditions<3>(BC_PERIODIC));
886     FunctionDefaults<3>::set_cubic_cell(0,L);
887     FunctionDefaults<3>::set_truncate_mode(truncate_mode);
888 
889     // kpoint list
890     //int nkpt = 4;
891     //std::vector<Vector<double,3> > kpoints(nkpt);
892     //kpoints[0] = vec(0.0*twopi/L, 0.0*twopi/L, 0.0*twopi/L);
893     //kpoints[1] = vec(0.0*twopi/L, 0.0*twopi/L, 0.5*twopi/L);
894     //kpoints[2] = vec(0.0*twopi/L, 0.5*twopi/L, 0.0*twopi/L);
895     //kpoints[3] = vec(0.0*twopi/L, 0.5*twopi/L, 0.5*twopi/L);
896     //double weight = 1.0/(double)nkpt;
897 
898     int nkpt = 8;
899     std::vector<Vector<double,3> > kpoints(nkpt);
900     kpoints[0] = vec(0.0*twopi/L, 0.0*twopi/L, 0.0*twopi/L);
901     kpoints[1] = vec(0.0*twopi/L, 0.0*twopi/L, 0.5*twopi/L);
902     kpoints[2] = vec(0.0*twopi/L, 0.5*twopi/L, 0.0*twopi/L);
903     kpoints[3] = vec(0.0*twopi/L, 0.5*twopi/L, 0.5*twopi/L);
904     kpoints[4] = vec(0.5*twopi/L, 0.0*twopi/L, 0.0*twopi/L);
905     kpoints[5] = vec(0.5*twopi/L, 0.0*twopi/L, 0.5*twopi/L);
906     kpoints[6] = vec(0.5*twopi/L, 0.5*twopi/L, 0.0*twopi/L);
907     kpoints[7] = vec(0.5*twopi/L, 0.5*twopi/L, 0.5*twopi/L);
908     double weight = 1.0/(double)nkpt;
909 
910     //int nkpt = 2;
911     //std::vector<Vector<double,3> > kpoints(nkpt);
912     //kpoints[0] = vec(0.0*twopi/L, 0.0*twopi/L, 0.0*twopi/L);
913     //kpoints[1] = vec(0.0*twopi/L, 0.0*twopi/L, 0.5*twopi/L);
914     //double weight = 1.0/(double)nkpt;
915 
916     //int nkpt = 1;
917     //std::vector<Vector<double,3> > kpoints(nkpt);
918     ////kpoints[0] = vec(0.0*twopi/L, 0.0*twopi/L, 0.0*twopi/L);
919     //kpoints[0] = vec(0.5*twopi/L, 0.5*twopi/L, 0.5*twopi/L);
920     //double weight = 1.0/(double)nkpt;
921 
922     // initialize subspace
923     subspace = new Subspace[nkpt];
924 
925     // // FCC unit cell for ne
926     //molecule.add_atom(  0,  0,  0, 10.0, 10);
927     // molecule.add_atom(L/2,L/2,  0, 10.0, 10);
928     // molecule.add_atom(L/2,  0,L/2, 10.0, 10);
929     // molecule.add_atom(  0,L/2,L/2, 10.0, 10);
930 
931     // Cubic cell for LiF
932     // molecule.add_atom(  0,  0,  0, 9.0, 9);
933     // molecule.add_atom(L/2,L/2,  0, 9.0, 9);
934     // molecule.add_atom(L/2,  0,L/2, 9.0, 9);
935     // molecule.add_atom(  0,L/2,L/2, 9.0, 9);
936     // molecule.add_atom(L/2,  0,  0, 3.0, 3);
937     // molecule.add_atom(  0,L/2,  0, 3.0, 3);
938     // molecule.add_atom(  0,  0,L/2, 3.0, 3);
939     // molecule.add_atom(L/2,L/2,L/2, 3.0, 3);
940 
941     // Cubic cell for CaF2
942     // molecule.add_atom(0.00*L, 0.00*L, 0.00*L, 20.0, 20);
943     // molecule.add_atom(0.50*L, 0.50*L, 0.00*L, 20.0, 20);
944     // molecule.add_atom(0.50*L, 0.00*L, 0.50*L, 20.0, 20);
945     // molecule.add_atom(0.00*L, 0.50*L, 0.50*L, 20.0, 20);
946     // molecule.add_atom(0.25*L, 0.25*L, 0.25*L, 9.0, 9);
947     // molecule.add_atom(0.75*L, 0.75*L, 0.75*L, 9.0, 9);
948     // molecule.add_atom(0.75*L, 0.75*L, 0.25*L, 9.0, 9);
949     // molecule.add_atom(0.25*L, 0.25*L, 0.75*L, 9.0, 9);
950     // molecule.add_atom(0.75*L, 0.25*L, 0.75*L, 9.0, 9);
951     // molecule.add_atom(0.25*L, 0.75*L, 0.25*L, 9.0, 9);
952     // molecule.add_atom(0.25*L, 0.75*L, 0.75*L, 9.0, 9);
953     // molecule.add_atom(0.75*L, 0.25*L, 0.25*L, 9.0, 9);
954 
955     // Cubic cell for NaCl
956     molecule.add_atom(0.0*L, 0.0*L, 0.0*L, 11.0, 11);
957     molecule.add_atom(0.0*L, 0.5*L, 0.5*L, 11.0, 11);
958     molecule.add_atom(0.5*L, 0.0*L, 0.5*L, 11.0, 11);
959     molecule.add_atom(0.5*L, 0.5*L, 0.0*L, 11.0, 11);
960     molecule.add_atom(0.5*L, 0.5*L, 0.5*L, 17.0, 17);
961     molecule.add_atom(0.5*L, 0.0*L, 0.0*L, 17.0, 17);
962     molecule.add_atom(0.0*L, 0.5*L, 0.0*L, 17.0, 17);
963     molecule.add_atom(0.0*L, 0.0*L, 0.5*L, 17.0, 17);
964 
965     // Cubic cell for Si
966     //molecule.add_atom(  0,     0,     0,     14.0, 14);
967     //molecule.add_atom(  L/2,   L/2,   0,     14.0, 14);
968     //molecule.add_atom(  L/2,   0,     L/2,   14.0, 14);
969     //molecule.add_atom(  0,     L/2,   L/2,   14.0, 14);
970     //molecule.add_atom(  L/4,   L/4,   L/4,   14.0, 14);
971     //molecule.add_atom(  3*L/4, 3*L/4, L/4,   14.0, 14);
972     //molecule.add_atom(  3*L/4, L/4,   3*L/4, 14.0, 14);
973     //molecule.add_atom(  L/4,   3*L/4, 3*L/4, 14.0, 14);
974 
975     molecule.set_eprec(1e-3);
976 
977     // Load basis
978     aobasis.read_file("sto-3g");
979 
980     // Nuclear potential
981     real_function_3d vnuc = real_factory_3d(world).functor(real_functor_3d(new NuclearDensityFunctor(molecule, L))).truncate_mode(0).truncate_on_project();
982     double nuclear_charge=vnuc.trace();
983     if (world.rank() == 0) print("total nuclear charge", nuclear_charge);
984     vnuc = -1.0*make_coulomb_potential(world, vnuc);
985     vnuc.truncate();
986     int nst = int(molecule.total_nuclear_charge() + 0.1)/2;
987 
988     // Guess density
989     real_function_3d rho = real_factory_3d(world).functor(real_functor_3d(new MolecularGuessDensityFunctor(molecule,aobasis))).truncate_on_project();
990     rho.truncate();
991     double rhot = rho.trace();
992     if (world.rank() == 0) print("total guess charge", rhot);
993     rho.scale(molecule.total_nuclear_charge()/rhot);
994 
995     // Make AO basis functions
996     auto psi = initial_guess(world, vnuc, rho, kpoints, nst);
997 
998     for (int iter=0; iter<100; iter++) {
999         if (world.rank() == 0) print("\n\n  Iteration",iter,"\n");
1000         auto v = vnuc + make_coulomb_potential(world,rho) + make_lda_potential(world,rho);
1001         truncate(world, psi);
1002         auto vpsi = apply_potential(world, v, psi);
1003 
1004         vector_complex_function_3d new_psi(nst*nkpt);
1005         vector_complex_function_3d rm(nst*nkpt);
1006         for (int ik = 0; ik < nkpt; ++ik) {
1007             vector_complex_function_3d psik(psi.begin()+ik*nst, psi.begin()+(ik+1)*nst);
1008             vector_complex_function_3d vpsik(vpsi.begin()+ik*nst, vpsi.begin()+(ik+1)*nst);
1009 
1010             auto ke_mat = make_kinetic_matrix(world, psik, kpoints[ik]);
1011             auto pe_mat = matrix_inner(world, psik, vpsik, true);
1012             auto ov_mat = matrix_inner(world, psik, psik, true);
1013 
1014             auto fock = ke_mat + pe_mat;
1015             // eliminate small off-diagonal elements and lift diagonal
1016             // degeneracies to reduce random mixing
1017             for (unsigned int i=0; i<psik.size(); i++) {
1018                 fock(i,i) += i*thresh*1e-2;
1019                 for (unsigned int j=0; j<i; j++) {
1020                     if (std::abs(fock(i,j)) < thresh*1e-1 || std::abs(ov_mat(i,j)) < thresh*1e-1) {
1021                         fock(i,j) = fock(j,i) = 0.0;
1022                         ov_mat(i,j) = ov_mat(j,i) = 0.0;
1023                     }
1024                 }
1025             }
1026 
1027             tensor_complex c;
1028             tensor_real e;
1029             sygv(fock, ov_mat, 1, c, e);
1030             if (world.rank() == 0) print("main() ik = ", ik);
1031             if (world.rank() == 0) print(e);
1032 
1033             // if (world.rank() == 0) print("fock: ");
1034             // if (world.rank() == 0) print(fock);
1035             // if (world.rank() == 0) print("overlap: ");
1036             // if (world.rank() == 0) print(ov_mat);
1037             // if (world.rank() == 0) print("eigenvectors: ");
1038             // if (world.rank() == 0) print(real(c));
1039             // if (world.rank() == 0) print(imag(c));
1040             fixphases(world, e, c);
1041 
1042             psik = transform(world, psik, c);
1043             vpsik = transform(world, vpsik, c);
1044             vcpairT pair_update = update(world, ik, psik, vpsik, kpoints[ik], v, e);
1045             vector_complex_function_3d new_psik = pair_update.first;
1046             vector_complex_function_3d rmk = pair_update.second;
1047 
1048             //subspace[ik].update_subspace(world, new_psik, psik, rmk);
1049             auto damp = 0.3;
1050             gaxpy(world,damp,psik,(1.0-damp),new_psik);
1051 
1052             //orthogonalize(world,psik);
1053             //truncate(world,psik);
1054 
1055             truncate(world,psik);
1056             orthogonalize(world,psik);
1057 
1058             for (int ist = 0; ist < nst; ist++) {
1059                 psi[ik*nst+ist] = psik[ist];
1060             }
1061         }
1062 
1063         if (world.rank() == 0) print(nkpt,nst,psi.size());
1064         MADNESS_ASSERT(nkpt*nst == (int) psi.size());
1065 
1066         if (iter == 20) {
1067           if (world.rank() == 0) print("reprojecting ..");
1068             vnuc = madness::project(vnuc, kwavelet+2, thresh*1e-2, true);
1069             rho = madness::project(rho, kwavelet+2, thresh*1e-2, true);
1070           for (unsigned int i = 0; i < psi.size(); i++) {
1071             FunctionDefaults<3>::set_k(kwavelet+2);
1072             FunctionDefaults<3>::set_thresh(thresh*1e-2);
1073             psi[i] = madness::project(psi[i], kwavelet+2, thresh*1e-2, true);
1074           }
1075           if (world.rank() == 0) print("done reprojecting ..");
1076         }
1077         if (iter == 30) {
1078           if (world.rank() == 0) print("reprojecting ..");
1079             vnuc = madness::project(vnuc, kwavelet+4, thresh*1e-4, true);
1080             rho = madness::project(rho, kwavelet+4, thresh*1e-4, true);
1081           for (unsigned int i = 0; i < psi.size(); i++) {
1082             FunctionDefaults<3>::set_k(kwavelet+4);
1083             FunctionDefaults<3>::set_thresh(thresh*1e-4);
1084             psi[i] = madness::project(psi[i], kwavelet+4, thresh*1e-4, true);
1085           }
1086           if (world.rank() == 0) print("done reprojecting ..");
1087         }
1088         if (iter == 50) {
1089           if (world.rank() == 0) print("reprojecting ..");
1090             vnuc = madness::project(vnuc, kwavelet+6, thresh*1e-6, true);
1091             rho = madness::project(rho, kwavelet+6, thresh*1e-6, true);
1092           for (unsigned int i = 0; i < psi.size(); i++) {
1093             FunctionDefaults<3>::set_k(kwavelet+6);
1094             FunctionDefaults<3>::set_thresh(thresh*1e-6);
1095             psi[i] = madness::project(psi[i], kwavelet+6, thresh*1e-6, true);
1096           }
1097           if (world.rank() == 0) print("done reprojecting ..");
1098         }
1099 
1100 
1101         auto rho_new = make_density(world, psi, weight);
1102         double rdiff = (rho-rho_new).norm2();
1103         double s = 0.3;
1104         rho = s*rho + (1.0-s)*rho_new;
1105         if (world.rank() == 0) printf("electron density difference:  %15.8e\n\n", rdiff);
1106     }
1107     return 0;
1108 }
1109