1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2008 The Regents of the University of California
4 //
5 // This file is part of Qbox
6 //
7 // Qbox is distributed under the terms of the GNU General Public License
8 // as published by the Free Software Foundation, either version 2 of
9 // the License, or (at your option) any later version.
10 // See the file COPYING in the root directory of this distribution
11 // or <http://www.gnu.org/licenses/>.
12 //
13 ////////////////////////////////////////////////////////////////////////////////
14 //
15 // BOSampleStepper.cpp
16 //
17 ////////////////////////////////////////////////////////////////////////////////
18 
19 #include "BOSampleStepper.h"
20 #include "EnergyFunctional.h"
21 #include "SlaterDet.h"
22 #include "Basis.h"
23 #include "WavefunctionStepper.h"
24 #include "SDWavefunctionStepper.h"
25 #include "PSDWavefunctionStepper.h"
26 #include "PSDAWavefunctionStepper.h"
27 #include "JDWavefunctionStepper.h"
28 #include "UserInterface.h"
29 #include "Preconditioner.h"
30 #include "SDIonicStepper.h"
31 #include "SDAIonicStepper.h"
32 #include "CGIonicStepper.h"
33 #include "ANDIonicStepper.h"
34 #include "MDIonicStepper.h"
35 #include "BMDIonicStepper.h"
36 #include "SDCellStepper.h"
37 #include "CGCellStepper.h"
38 #include "AndersonMixer.h"
39 #include "MLWFTransform.h"
40 #include "D3tensor.h"
41 #include "cout0.h"
42 
43 #include <iostream>
44 #include <iomanip>
45 using namespace std;
46 
47 ////////////////////////////////////////////////////////////////////////////////
BOSampleStepper(Sample & s,int nitscf,int nite)48 BOSampleStepper::BOSampleStepper(Sample& s, int nitscf, int nite) :
49   SampleStepper(s), cd_(s.wf), ef_(s,cd_),
50   dwf(s.wf), nitscf_(nitscf), nite_(nite),
51   update_density_first_(true), update_vh_(true), update_vxc_(true) {}
52 
53 ////////////////////////////////////////////////////////////////////////////////
~BOSampleStepper()54 BOSampleStepper::~BOSampleStepper()
55 {
56   for ( TimerMap::iterator i = tmap.begin(); i != tmap.end(); i++ )
57   {
58     double time = i->second.real();
59     double tmin, tmax;
60     MPI_Reduce(&time,&tmin,1,MPI_DOUBLE,MPI_MIN,0,MPIdata::comm());
61     MPI_Reduce(&time,&tmax,1,MPI_DOUBLE,MPI_MAX,0,MPIdata::comm());
62     if ( MPIdata::onpe0() && (tmax > 0.0) )
63     {
64       string s = "name=\"" + i->first + "\"";
65       cout << "<timing " << left << setw(22) << s
66            << " min=\"" << setprecision(3) << tmin << "\""
67            << " max=\"" << setprecision(3) << tmax << "\"/>"
68            << endl;
69     }
70   }
71 }
72 
73 ////////////////////////////////////////////////////////////////////////////////
initialize_density(void)74 void BOSampleStepper::initialize_density(void)
75 {
76   // initialize cd_ with a sum of atomic densities
77 
78   double atom_radius[] =
79   {
80     0.000, 1.542, 0.931, 1.727, 1.636, // null H  He  Li  Be
81     1.845, 1.538, 1.311, 1.141, 1.014, //  B   C   N   O   F
82     0.983, 1.238, 1.347, 1.376, 2.151, // Ne  Na  Mg  Al  Si
83     1.927, 1.733, 1.563, 1.429, 1.546, //  P   S  Cl  Ar   K
84     1.663, 1.604, 1.516, 1.434, 1.377, // Ca  Sc  Ti   V  Cr
85     1.310, 1.245, 1.201, 1.162, 1.098, // Mn  Fe  Co  Ni  Cu
86     1.077, 1.331, 1.415, 2.015, 1.880, // Zn  Ga  Ge  As  Se
87     1.749, 1.630, 1.705, 1.819, 1.794, // Br  Kr  Rb  Sr   Y
88     1.728, 1.664, 1.589, 1.523, 1.461, // Zr  Nb  Mo  Tc  Ru
89     1.410, 1.348, 1.306, 1.303, 1.554, // Rh  Pd  Ag  Cd  In
90     1.609, 1.611, 1.530, 1.514, 1.464, // Sn  Sb  Te   I  Xe
91     1.946, 1.967, 1.943, 1.930, 1.920, // Cs  Ba  La  Ce  Pr
92     1.910, 1.900, 1.890, 1.880, 1.870, // Nd  Pm  Sm  Eu  Gd
93     1.860, 1.850, 1.840, 1.830, 1.820, // Tb  Dy  Ho  Er  Tm
94     1.810, 1.800, 1.701, 1.658, 1.606, // Yb  Lu  Hf  Ta   W
95     1.550, 1.500, 1.446, 1.398, 1.355, // Re  Os  Ir  Pt  Au
96     1.314, 1.624, 1.659, 1.634, 1.620, // Hg  Tl  Pb  Bi  Po
97     1.600, 1.500, 1.600, 1.600, 1.600, // At  Rn  Fr  Ra  Ac
98     1.600, 1.600, 1.600, 1.600, 1.600, // Th  Pa   U  Np  Pu
99     1.600, 1.600, 1.600, 1.600, 1.600, // Am  Cm  Bk  Cf  Es
100     1.600, 1.600, 1.600, 1.600         // Fm  Md  No  Lr
101   };
102 
103   const AtomSet& atoms = s_.atoms;
104   const Basis* const vbasis = cd_.vbasis();
105   const int ngloc = vbasis->localsize();
106   vector<vector<complex<double> > > rhops;
107   const int nsp = atoms.nsp();
108   rhops.resize(nsp);
109   vector<complex<double> > rhopst(ngloc);
110   const double * const g2 = vbasis->g2_ptr();
111 
112   for ( int is = 0; is < nsp; is++ )
113   {
114     rhops[is].resize(ngloc);
115     Species *s = atoms.species_list[is];
116     const int zval = s->zval();
117     const int atomic_number = s->atomic_number();
118     assert(atomic_number < sizeof(atom_radius)/sizeof(double));
119     double rc = atom_radius[atomic_number];
120     for ( int ig = 0; ig < ngloc; ig++ )
121     {
122       double arg = 0.25 * rc * rc * g2[ig];
123       rhops[is][ig] = zval * exp( -arg );
124     }
125   }
126 
127   vector<vector<double> > tau0;
128   tau0.resize(nsp);
129   for ( int is = 0; is < nsp; is++ )
130     tau0.resize(3*atoms.na(is));
131   atoms.get_positions(tau0);
132   StructureFactor sf;
133   sf.init(tau0,*vbasis);
134   sf.update(tau0,*vbasis);
135 
136   memset( (void*)&rhopst[0], 0, 2*ngloc*sizeof(double) );
137   for ( int is = 0; is < nsp; is++ )
138   {
139     complex<double> *s = &sf.sfac[is][0];
140     for ( int ig = 0; ig < ngloc; ig++ )
141     {
142       const complex<double> sg = s[ig];
143       rhopst[ig] += sg * rhops[is][ig];
144     }
145   }
146 
147   // Adjust G=0 component of the charge if net_charge is non-zero
148   rhopst[0] += s_.wf.nel() - atoms.nel();
149 
150   // Initialize charge equally for both spins
151   cd_.rhog[0] = rhopst;
152   if ( cd_.rhog.size() == 2 )
153   {
154     assert(cd_.rhog[0].size()==cd_.rhog[1].size());
155     for ( int i = 0; i < cd_.rhog[0].size(); i++ )
156     {
157       cd_.rhog[0][i] = 0.5 * rhopst[i];
158       cd_.rhog[1][i] = 0.5 * rhopst[i];
159     }
160   }
161   cd_.update_rhor();
162   update_density_first_ = false;
163 }
164 
165 ////////////////////////////////////////////////////////////////////////////////
step(int niter)166 void BOSampleStepper::step(int niter)
167 {
168   const bool anderson_charge_mixing = ( s_.ctrl.charge_mix_ndim > 0 );
169 
170   // determine whether eigenvectors must be computed
171   // eigenvectors are computed if explicitly requested with wf_diag==T
172   // or if the SlaterDet has fractionally occupied states
173   const bool fractional_occ = (s_.wf.nel() != 2 * s_.wf.nst());
174   const bool compute_eigvec = fractional_occ || s_.ctrl.wf_diag == "T";
175   const bool compute_mlwf = s_.ctrl.wf_diag == "MLWF";
176   const bool compute_mlwfc = s_.ctrl.wf_diag == "MLWFC";
177   enum ortho_type { GRAM, LOWDIN, ORTHO_ALIGN, RICCATI };
178 
179   const double gpa = 29421.5;
180 
181   AtomSet& atoms = s_.atoms;
182   Wavefunction& wf = s_.wf;
183   Wavefunction*& wfv = s_.wfv;
184   const bool onpe0 = MPIdata::onpe0();
185   const Context& sd_ctxt = wf.sd_context();
186 
187   const int nspin = wf.nspin();
188 
189   const double dt = s_.ctrl.dt;
190 
191   const string wf_dyn = s_.ctrl.wf_dyn;
192   const string atoms_dyn = s_.ctrl.atoms_dyn;
193   const string cell_dyn = s_.ctrl.cell_dyn;
194 
195   const bool extrapolate_wf = ( atoms_dyn == "MD" );
196 
197   bool ntc_extrapolation = false;
198   bool asp_extrapolation = false;
199 
200   const map<string,string>& debug_map = s_.ctrl.debug;
201 
202   map<string,string>::const_iterator imap =
203     debug_map.find("EXTRAPOLATION");
204   if ( imap != debug_map.end() )
205   {
206     const string val = imap->second;
207     if ( val == "NTC" ) ntc_extrapolation = true;
208     if ( val == "ASP" ) asp_extrapolation = true;
209   }
210 
211   Wavefunction* wfmm;
212   if ( extrapolate_wf && ( ntc_extrapolation || asp_extrapolation ) )
213     wfmm = new Wavefunction(wf);
214 
215   // Next lines: special value of niter = 0: GS calculation only
216   const bool atoms_move = ( niter > 0 && atoms_dyn != "LOCKED" );
217   const bool compute_stress = ( s_.ctrl.stress == "ON" );
218   const bool cell_moves = ( niter > 0 && compute_stress &&
219                             cell_dyn != "LOCKED" );
220   // GS-only calculation:
221   const bool gs_only = !atoms_move && !cell_moves;
222 
223   const double force_tol = s_.ctrl.force_tol;
224   const double stress_tol = s_.ctrl.stress_tol;
225 
226   Timer tm_iter;
227 
228   Preconditioner prec(wf,ef_,s_.ctrl.ecutprec);
229 
230   WavefunctionStepper* wf_stepper = 0;
231   if ( wf_dyn == "SD" )
232   {
233     const double emass = s_.ctrl.emass;
234     double dt2bye = (emass == 0.0) ? 0.5 / wf.ecut() : dt*dt/emass;
235 
236     // divide dt2bye by facs coefficient if stress == ON
237     const double facs = 2.0;
238     if ( s_.ctrl.stress == "ON" )
239     {
240       dt2bye /= facs;
241     }
242     wf_stepper = new SDWavefunctionStepper(wf,dt2bye,tmap);
243   }
244   else if ( wf_dyn == "PSD" )
245     wf_stepper = new PSDWavefunctionStepper(wf,prec,tmap);
246   else if ( wf_dyn == "PSDA" )
247     wf_stepper = new PSDAWavefunctionStepper(wf,prec,tmap);
248   else if ( wf_dyn == "JD" )
249     wf_stepper = new JDWavefunctionStepper(wf,prec,ef_,tmap);
250 
251   // wf_stepper == 0 indicates that wf_dyn == LOCKED
252 
253   IonicStepper* ionic_stepper = 0;
254   if ( atoms_dyn == "SD" )
255     ionic_stepper = new SDIonicStepper(s_);
256   else if ( atoms_dyn == "SDA" )
257     ionic_stepper = new SDAIonicStepper(s_);
258   else if ( atoms_dyn == "CG" )
259     ionic_stepper = new CGIonicStepper(s_);
260   else if ( atoms_dyn == "AND" )
261     ionic_stepper = new ANDIonicStepper(s_);
262   else if ( atoms_dyn == "MD" )
263     ionic_stepper = new MDIonicStepper(s_);
264   else if ( atoms_dyn == "BMD" )
265     ionic_stepper = new BMDIonicStepper(s_);
266 
267   if ( ionic_stepper )
268     ionic_stepper->setup_constraints();
269 
270   CellStepper* cell_stepper = 0;
271   if ( cell_dyn == "SD" )
272     cell_stepper = new SDCellStepper(s_);
273   else if ( cell_dyn == "CG" )
274     cell_stepper = new CGCellStepper(s_);
275 
276   // Allocate wavefunction velocity if not available
277   if ( atoms_move && extrapolate_wf )
278   {
279     if ( wfv == 0 )
280     {
281       wfv = new Wavefunction(wf);
282       wfv->clear();
283     }
284   }
285 
286   vector<MLWFTransform*> mlwft(wf.nsp_loc());
287 
288   if ( compute_mlwf || compute_mlwfc )
289   {
290     // MLWF can be computed at the gamma point only
291     // There must be a single k-point, and it must be gamma
292     if ( wf.nkp() > 1 || ( wf.nkp()==1 && wf.kpoint(0) != D3vector(0,0,0) ) )
293     {
294       if ( onpe0 )
295       {
296         cout << " BOSampleStepper::step: MLWF can be computed at k=0 only"
297              << endl;
298         cout << " BOSampleStepper::step: cannot run" << endl;
299       }
300       return;
301     }
302 
303     for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
304     {
305       const int ikp = 0;
306       const int ikp_loc = wf.ikp_local(ikp);
307       if ( ikp_loc >= 0 )
308         mlwft[isp_loc] = new MLWFTransform(*wf.sd(isp_loc,ikp_loc));
309     }
310   }
311 
312   // Charge mixing variables: include both spins in the same vector
313   if ( nspin > 1 ) assert(cd_.rhog[0].size()==cd_.rhog[1].size());
314   const int ng = cd_.rhog[0].size();
315   vector<complex<double> > rhog_in(nspin*ng), drhog(nspin*ng),
316     rhobar(nspin*ng), drhobar(nspin*ng);
317   const int anderson_ndim = s_.ctrl.charge_mix_ndim;
318   vector<double> wkerker(ng), wls(ng);
319 
320   // Anderson charge mixer: include both spins in the same vector
321   // Factor of 2: complex coeffs stored as double
322   MPI_Comm vcomm = MPIdata::g_comm();
323   AndersonMixer mixer(2*nspin*ng,anderson_ndim,&vcomm);
324 
325   // compute Kerker preconditioning
326   // real space Kerker cutoff in a.u.
327   const double rc_Kerker = s_.ctrl.charge_mix_rcut;
328   const double *const g2 = cd_.vbasis()->g2_ptr();
329 
330   // define q1 cutoff for row weighting of LS charge mixing
331   // Use rc1 = 3 a.u. default cutoff
332   double rc1 = 3.0;
333   // check if override from the debug map
334   // use: set debug RC1 <value>
335   imap = debug_map.find("RC1");
336   if ( imap != debug_map.end() )
337   {
338     const string val = imap->second;
339     istringstream is(val);
340     is >> rc1;
341     if ( onpe0 )
342       cout << " override rc1 value: rc1 = " << rc1 << endl;
343     assert(rc1 >= 0.0);
344   }
345 
346   double delta_ratio = 0.01;
347   imap = debug_map.find("DELTA_RATIO");
348   if ( imap != debug_map.end() )
349   {
350     const string val = imap->second;
351     istringstream is(val);
352     is >> delta_ratio;
353     if ( onpe0 )
354       cout << " override delta_ratio value = " << delta_ratio << endl;
355     assert(delta_ratio >= 0.0);
356   }
357 
358   if ( rc1 != 0.0 )
359   {
360     const double q1 = 2.0 * M_PI / rc1;
361     for ( int i = 0; i < wls.size(); i++ )
362     {
363       if ( g2[i] != 0.0 )
364         wls[i] = sqrt(g2[i] / ( g2[i] + q1*q1 ));
365       else
366         wls[i] = 1.0;
367     }
368   }
369   else
370   {
371     for ( int i = 0; i < wls.size(); i++ )
372       wls[i] = 1.0;
373   }
374 
375   if ( rc_Kerker > 0.0 )
376   {
377     const double q0_kerker = 2 * M_PI / rc_Kerker;
378     const double q0_kerker2 = q0_kerker * q0_kerker;
379     for ( int i = 0; i < wkerker.size(); i++ )
380     {
381       if ( g2[i] != 0.0 )
382         wkerker[i] = g2[i] / ( g2[i] + q0_kerker2 );
383       else
384         wkerker[i] = 1.0;
385     }
386   }
387   else
388   {
389     for ( int i = 0; i < wkerker.size(); i++ )
390       wkerker[i] = 1.0;
391   }
392 
393   if ( onpe0 )
394     cout << "<net_charge> " << atoms.nel()-wf.nel() << " </net_charge>\n";
395 
396   // Next line: special case of niter=0: compute GS only
397   bool iter_done = false;
398   for ( int iter = 0; iter < max(niter,1) && !iter_done; iter++ )
399   {
400     // ionic iteration
401 
402     tm_iter.start();
403 
404     if ( onpe0 )
405       cout << "<iteration count=\"" << iter+1 << "\">\n";
406 
407     // compute energy and ionic forces using existing wavefunction
408     double maxforce = 0.0;
409     double maxstress = 0.0;
410 
411     if ( !gs_only )
412     {
413       tmap["charge"].start();
414       cd_.update_density();
415       tmap["charge"].stop();
416 
417       tmap["update_vhxc"].start();
418       ef_.update_vhxc(compute_stress);
419       tmap["update_vhxc"].stop();
420       const bool compute_forces = true;
421       tmap["energy"].start();
422       double energy =
423         ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks);
424       tmap["energy"].stop();
425       double enthalpy = ef_.enthalpy();
426 
427       if ( force_tol > 0.0 )
428       {
429         maxforce = 0.0;
430         for ( int is = 0; is < fion.size(); is++ )
431           for ( int i = 0; i < fion[is].size(); i++ )
432             maxforce = max(maxforce, fabs(fion[is][i]));
433       }
434 
435       if ( stress_tol > 0.0 )
436       {
437         compute_sigma();
438         for ( int i = 0; i < sigma.size(); i++ )
439           maxstress = max(maxstress, gpa*fabs(sigma[i]));
440       }
441 
442       if ( onpe0 )
443       {
444         cout << cd_;
445         cout << ef_;
446         if ( ef_.el_enth() )
447           cout << *ef_.el_enth();
448       }
449 
450       if ( iter > 0 && ionic_stepper )
451       {
452         ionic_stepper->compute_v(energy,fion);
453       }
454       // at this point, positions r0, velocities v0 and forces fion are
455       // consistent
456 
457       // execute commands in iter_cmd if defined
458       if ( !iter_cmd_.empty() )
459       {
460         if ( iter % iter_cmd_period_ == 0 )
461         {
462           // copy positions and velocities from IonicStepper to AtomSet
463           if ( ionic_stepper )
464           {
465             ionic_stepper->set_positions();
466             ionic_stepper->set_velocities();
467           }
468           // command must be terminated with \n
469           istringstream cmdstream(iter_cmd_ + "\n");
470           s_.ui->processCmds(cmdstream,"[iter_cmd]",true);
471           // copy positions and velocities back from AtomSet
472           if ( ionic_stepper )
473           {
474             ionic_stepper->get_positions();
475             ionic_stepper->get_velocities();
476           }
477         }
478       }
479 
480       double ekin_ion = 0.0, temp_ion = 0.0;
481       if ( ionic_stepper )
482       {
483         ekin_ion = ionic_stepper->ekin();
484         temp_ion = ionic_stepper->temp();
485       }
486 
487       // print positions, velocities and forces at time t0
488       if ( onpe0 )
489       {
490         cout << "<atomset>" << endl;
491         cout << atoms.cell();
492         for ( int is = 0; is < atoms.atom_list.size(); is++ )
493         {
494           int i = 0;
495           for ( int ia = 0; ia < atoms.atom_list[is].size(); ia++ )
496           {
497             Atom* pa = atoms.atom_list[is][ia];
498             cout << "  <atom name=\"" << pa->name() << "\""
499                  << " species=\"" << pa->species()
500                  << "\">\n"
501                  << "    <position> " << pa->position() << " </position>\n"
502                  << "    <velocity> " << pa->velocity() << " </velocity>\n"
503                  << "    <force> "
504                  << fion[is][i] << " "
505                  << fion[is][i+1] << " "
506                  << fion[is][i+2]
507                  << " </force>\n";
508             cout << "  </atom>" << endl;
509             i += 3;
510           }
511         }
512         cout << "</atomset>" << endl;
513         cout << setprecision(6);
514         cout << "<unit_cell_a_norm> " << atoms.cell().a_norm(0)
515              << " </unit_cell_a_norm>" << endl;
516         cout << "<unit_cell_b_norm> " << atoms.cell().a_norm(1)
517              << " </unit_cell_b_norm>" << endl;
518         cout << "<unit_cell_c_norm> " << atoms.cell().a_norm(2)
519              << " </unit_cell_c_norm>" << endl;
520         cout << setprecision(3) << "<unit_cell_alpha>  "
521              << atoms.cell().alpha() << " </unit_cell_alpha>" << endl;
522         cout << setprecision(3) << "<unit_cell_beta>   "
523              << atoms.cell().beta() << " </unit_cell_beta>" << endl;
524         cout << setprecision(3) << "<unit_cell_gamma>  "
525              << atoms.cell().gamma() << " </unit_cell_gamma>" << endl;
526         cout << setprecision(3) << "<unit_cell_volume> "
527              << atoms.cell().volume() << " </unit_cell_volume>" << endl;
528 
529         // include the kinetic energy of the stepper
530         // e.g. to include thermostat contributions
531         double ekin_stepper;
532         if ( ionic_stepper != 0 )
533           ekin_stepper = ionic_stepper->ekin_stepper();
534         cout << setprecision(8);
535         cout << "  <econst> " << enthalpy+ekin_ion+ekin_stepper
536              << " </econst>\n";
537         cout << "  <ekin_ion> " << ekin_ion << " </ekin_ion>\n";
538         cout << "  <temp_ion> " << temp_ion << " </temp_ion>\n";
539       }
540 
541       if ( atoms_move )
542       {
543         if ( s_.constraints.size() > 0 )
544         {
545           s_.constraints.update_constraints(dt);
546           s_.constraints.compute_forces(ionic_stepper->r0(), fion);
547           if ( onpe0 )
548           {
549             s_.constraints.list_constraints(cout);
550           }
551         }
552         // move atoms to new position: r0 <- r0 + v0*dt + dt2/m * fion
553         ionic_stepper->compute_r(energy,fion);
554         ef_.atoms_moved();
555       }
556 
557       if ( compute_stress )
558       {
559         compute_sigma();
560         print_stress();
561 
562         if ( cell_moves )
563         {
564           cell_stepper->compute_new_cell(enthalpy,sigma,fion);
565 
566           // Update cell
567           cell_stepper->update_cell();
568 
569           ef_.cell_moved();
570           ef_.atoms_moved(); // modifications of the cell also move ions
571         }
572       }
573     } // if !gs_only
574 
575     // Recalculate ground state wavefunctions
576     // wavefunction extrapolation
577     if ( atoms_move && extrapolate_wf )
578     {
579       for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
580       {
581         for ( int ikp_loc = 0; ikp_loc < wf.nkp_loc(); ++ikp_loc )
582         {
583           if ( ntc_extrapolation )
584           {
585             SlaterDet* sd = wf.sd(isp_loc,ikp_loc);
586             SlaterDet* sdv = wfv->sd(isp_loc,ikp_loc);
587             SlaterDet* sdmm = wfmm->sd(isp_loc,ikp_loc);
588             double* c = (double*) sd->c().cvalptr();
589             double* cv = (double*) sdv->c().cvalptr();
590             double* cmm = (double*) sdmm->c().cvalptr();
591             const int mloc = sd->c().mloc();
592             const int nloc = sd->c().nloc();
593             const int len = 2*mloc*nloc;
594             if ( iter == 0 )
595             {
596               // copy c on cv
597               for ( int i = 0; i < len; i++ )
598               {
599                 const double x = c[i];
600                 const double v = cv[i];
601                 // extrapolation using velocity in cv
602                 c[i] = x + dt * v;
603                 cv[i] = x;
604               }
605               tmap["gram"].start();
606               sd->gram();
607               tmap["gram"].stop();
608             }
609             else if ( iter == 1 )
610             {
611               sdv->align(*sd);
612               for ( int i = 0; i < len; i++ )
613               {
614                 const double x = c[i];
615                 const double xm = cv[i];
616                 c[i] = 2.0 * x - xm;
617                 cv[i] = x;
618                 cmm[i] = xm;
619               }
620               tmap["gram"].start();
621               sd->gram();
622               tmap["gram"].stop();
623             }
624             else
625             {
626               // align wf with wfmm before extrapolation
627               sd->align(*sdmm);
628 
629               // extrapolate
630               for ( int i = 0; i < len; i++ )
631               {
632                 const double x = c[i];   // current wf (scf converged) at t
633                 const double xm = cv[i]; // extrapolated wf at t
634                 const double xmm = cmm[i]; // extrapolated wf at t-dt
635                 c[i] = 2.0 * x - xmm;
636                 // save extrapolated value at t in cmm
637                 cmm[i] = xm;
638               }
639               // orthogonalize the extrapolated value
640               tmap["gram"].start();
641               sd->gram();
642               tmap["gram"].stop();
643 
644               // c[i] now contains the extrapolated value
645               // save a copy in cv[i]
646               for ( int i = 0; i < len; i++ )
647               {
648                 cv[i] = c[i];
649               }
650             }
651             // c[i] is now ready for electronic iterations
652           }
653           else if ( asp_extrapolation )
654           {
655             SlaterDet* sd = wf.sd(isp_loc,ikp_loc);
656             SlaterDet* sdv = wfv->sd(isp_loc,ikp_loc);
657             SlaterDet* sdmm = wfmm->sd(isp_loc,ikp_loc);
658             double* c = (double*) sd->c().cvalptr();
659             double* cv = (double*) sdv->c().cvalptr();
660             double* cmm = (double*) sdmm->c().cvalptr();
661             const int mloc = s_.wf.sd(isp_loc,ikp_loc)->c().mloc();
662             const int nloc = s_.wf.sd(isp_loc,ikp_loc)->c().nloc();
663             const int len = 2*mloc*nloc;
664             if ( iter == 0 )
665             {
666               for ( int i = 0; i < len; i++ )
667               {
668                 const double x = c[i];
669                 const double v = cv[i];
670                 // extrapolation using velocity in cv
671                 c[i] = x + dt * v;
672                 cv[i] = x;
673               }
674               tmap["gram"].start();
675               sd->gram();
676               tmap["gram"].stop();
677             }
678             else if ( iter == 1 )
679             {
680               sdv->align(*sd);
681               for ( int i = 0; i < len; i++ )
682               {
683                 const double x = c[i];
684                 const double xm = cv[i];
685                 c[i] = 2.0 * x - xm;
686                 cv[i] = x;
687                 cmm[i] = xm;
688               }
689               tmap["gram"].start();
690               sd->gram();
691               tmap["gram"].stop();
692             }
693             else
694             {
695               // align wf with wfmm before extrapolation
696               sd->align(*sdmm);
697 
698               // extrapolate
699               for ( int i = 0; i < len; i++ )
700               {
701                 const double x = c[i];   // current wf (scf converged) at t
702                 const double xm = cv[i]; // extrapolated wf at t
703                 const double xmm = cmm[i]; // extrapolated wf at t-dt
704                 const double asp_a1 = 0.5;
705                 c[i] = 2.0 * x - xm +
706                        asp_a1 * ( x - 2.0 * xm + xmm );
707                 //c[i] = 2.5 * x - 2.0 * xm + 0.5 * xmm;
708                 cmm[i] = xm;
709                 cv[i] = x;
710               }
711               // orthogonalize the extrapolated value
712               tmap["gram"].start();
713               sd->gram();
714               tmap["gram"].stop();
715 
716               // c[i] now contains the extrapolated value
717             }
718             // c[i] is now ready for electronic iterations
719           }
720           else // normal extrapolation
721           {
722             SlaterDet* sd = wf.sd(isp_loc,ikp_loc);
723             SlaterDet* sdv = wfv->sd(isp_loc,ikp_loc);
724             double* c = (double*) sd->c().cvalptr();
725             double* cv = (double*) sdv->c().cvalptr();
726             const int mloc = s_.wf.sd(isp_loc,ikp_loc)->c().mloc();
727             const int nloc = s_.wf.sd(isp_loc,ikp_loc)->c().nloc();
728             const int len = 2*mloc*nloc;
729             if ( iter == 0 )
730             {
731               // copy c to cv
732               for ( int i = 0; i < len; i++ )
733               {
734                 const double x = c[i];
735                 const double v = cv[i];
736                 c[i] = x + dt * v;
737                 cv[i] = x;
738               }
739               tmap["gram"].start();
740               sd->gram();
741               tmap["gram"].stop();
742             }
743             else
744             {
745               tmap["align"].start();
746               sdv->align(*sd);
747               tmap["align"].stop();
748 
749               // linear extrapolation
750               for ( int i = 0; i < len; i++ )
751               {
752                 const double x = c[i];
753                 const double xm = cv[i];
754                 c[i] = 2.0 * x - xm;
755                 cv[i] = x;
756               }
757               tmap["lowdin"].start();
758               sd->lowdin();
759               tmap["lowdin"].stop();
760             }
761           }
762         }
763       }
764     } // atoms_move && extrapolate_wf
765 
766     // do nitscf self-consistent iterations, each with nite electronic steps
767     if ( wf_stepper != 0 )
768     {
769       wf_stepper->preprocess();
770       if ( anderson_charge_mixing )
771         mixer.restart();
772 
773       double ehart, ehart_m;
774       double delta_ehart;
775       bool scf_converged = false;
776       int itscf = 0;
777       double etotal = 0.0, etotal_m = 0.0, etotal_mm = 0.0;
778 
779       while ( !scf_converged && itscf < nitscf_ )
780       {
781         if ( nite_ > 0 && onpe0 )
782           cout << "  BOSampleStepper: start scf iteration" << endl;
783 
784         // update charge density
785         tmap["charge"].start();
786         // The density is updated at the first step if update_density_first_
787         // is true.
788         // It is always updated after the first step
789         if ( ( update_density_first_ || itscf>0 ) )
790           cd_.update_density();
791         tmap["charge"].stop();
792 
793         if ( onpe0 )
794           cout << cd_;
795 
796         // charge mixing
797         if ( nite_ > 0 )
798         {
799           if ( itscf == 0 )
800           {
801             // at first scf iteration, nothing to mix
802             // memorize rhog in rhog_in for next iteration
803 
804             for ( int ispin = 0; ispin < nspin; ispin++ )
805               for ( int i = 0; i < ng; i++ )
806                 rhog_in[i+ng*ispin] = cd_.rhog[ispin][i];
807           }
808           else
809           {
810             // itscf > 0
811             // compute unscreened correction drhog
812             for ( int ispin = 0; ispin < nspin; ispin++ )
813             {
814               for ( int i = 0; i < ng; i++ )
815               {
816                 drhog[i+ng*ispin] = (cd_.rhog[ispin][i]-rhog_in[i+ng*ispin]);
817               }
818             }
819 
820             const double alpha = s_.ctrl.charge_mix_coeff;
821             // Anderson acceleration
822             if ( anderson_charge_mixing )
823             {
824               // row weighting of LS calculation
825               for ( int ispin = 0; ispin < nspin; ispin++ )
826               {
827                 for ( int i = 0; i < ng; i++ )
828                   drhog[i+ng*ispin] /= wls[i];
829               }
830 
831               if ( sd_ctxt.mycol() == 0 )
832               {
833                 // use AndersonMixer on first column only and bcast results
834                 mixer.update((double*)&rhog_in[0], (double*)&drhog[0],
835                              (double*)&rhobar[0], (double*)&drhobar[0]);
836                 const int n = 2*nspin*ng;
837                 sd_ctxt.dbcast_send('r',n,1,(double*)&rhobar[0],n);
838                 sd_ctxt.dbcast_send('r',n,1,(double*)&drhobar[0],n);
839               }
840               else
841               {
842                 const int n = 2*nspin*ng;
843                 sd_ctxt.dbcast_recv('r',n,1,(double*)&rhobar[0],n,-1,0);
844                 sd_ctxt.dbcast_recv('r',n,1,(double*)&drhobar[0],n,-1,0);
845               }
846 
847               for ( int ispin = 0; ispin < nspin; ispin++ )
848               {
849                 for ( int i = 0; i < ng; i++ )
850                   drhobar[i+ng*ispin] *= wls[i];
851                 for ( int i = 0; i < ng; i++ )
852                   rhog_in[i+ng*ispin] = rhobar[i+ng*ispin] +
853                     alpha * drhobar[i+ng*ispin] * wkerker[i];
854               }
855             }
856             else
857             {
858               for ( int ispin = 0; ispin < nspin; ispin++ )
859               {
860                 for ( int i = 0; i < ng; i++ )
861                   rhog_in[i+ng*ispin] += alpha * drhog[i+ng*ispin] * wkerker[i];
862               }
863             }
864 
865             for ( int ispin = 0; ispin < nspin; ispin++ )
866             {
867               for ( int i = 0; i < ng; i++ )
868                 cd_.rhog[ispin][i] = rhog_in[i+ng*ispin];
869             }
870             cd_.update_rhor();
871           }
872         } // if nite_ > 0
873 
874         // update vhxc:
875         // at first scf step:
876         // - update both vh and vxc
877         // at later steps:
878         // - update depending of values of update_vh_ and update_vxc_
879         tmap["update_vhxc"].start();
880         if ( itscf == 0 )
881           ef_.update_vhxc(compute_stress);
882         else
883           ef_.update_vhxc(compute_stress, update_vh_, update_vxc_);
884         tmap["update_vhxc"].stop();
885 
886         // reset stepper only if multiple non-selfconsistent steps
887         if ( nite_ > 0 ) wf_stepper->preprocess();
888 
889         // non-self-consistent loop
890         // repeat until the change in eigenvalue_sum is smaller than a
891         // fraction delta_ratio of the change in Hartree energy delta_ehart
892         // in the last scf iteration
893         bool nonscf_converged = false;
894         if ( itscf == 0 )
895         {
896           ehart = ef_.ehart();
897           delta_ehart = 0.0;
898         }
899         else if ( itscf == 1 )
900         {
901           ehart_m = ehart;
902           ehart = ef_.ehart();
903           delta_ehart = fabs(ehart - ehart_m);
904         }
905         else
906         {
907           // itscf > 1
908           // only allow decrease in delta_ehart
909           ehart_m = ehart;
910           ehart = ef_.ehart();
911           delta_ehart = min(delta_ehart,fabs(ehart - ehart_m));
912         }
913 #if DEBUG
914         if ( onpe0 )
915         {
916           cout << " BOSampleStepper::step: delta_ehart: "
917                << delta_ehart << endl;
918         }
919 #endif
920 
921         int ite = 0;
922         double etotal_int;
923 
924         double eigenvalue_sum, eigenvalue_sum_m = 0.0;
925         // if nite == 0: do 1 iteration, no screening in charge mixing
926         // if nite > 0: do nite iterations, use screening in charge mixing
927         //
928         while ( !nonscf_converged && ite < max(nite_,1) )
929         {
930           tmap["energy"].start();
931           ef_.energy(true,dwf,false,fion,false,sigma_eks);
932           tmap["energy"].stop();
933 
934           // compute the sum of eigenvalues (with fixed weight)
935           // to measure convergence of the subspace update
936           // compute trace of the Hamiltonian matrix Y^T H Y
937           // scalar product of Y and (HY): tr Y^T (HY) = sum_ij Y_ij (HY)_ij
938           // Note: since the hamiltonian is hermitian and dwf=H*wf
939           // the dot product in the following line is real
940 
941           if ( ite > 0 )
942             eigenvalue_sum_m = eigenvalue_sum;
943 
944           eigenvalue_sum = real(s_.wf.dot(dwf));
945           if ( onpe0 )
946             cout << "  <eigenvalue_sum>  " << setprecision(8)
947                  << eigenvalue_sum << " </eigenvalue_sum>" << endl;
948 
949           tmap["wf_update"].start();
950           wf_stepper->update(dwf);
951           tmap["wf_update"].stop();
952 
953           // compare delta_eig_sum only after first iteration
954           if ( ite > 0 )
955           {
956             double delta_eig_sum = fabs(eigenvalue_sum - eigenvalue_sum_m);
957             nonscf_converged |= (delta_eig_sum < delta_ratio * delta_ehart);
958 #if DEBUG
959             if ( onpe0 )
960             {
961               cout << " BOSampleStepper::step delta_eig_sum: "
962                    << delta_eig_sum << endl;
963             }
964 #endif
965           }
966           ite++;
967         }
968 
969         // subspace diagonalization
970         if ( compute_eigvec || s_.ctrl.wf_diag == "EIGVAL" )
971         {
972           tmap["energy"].start();
973           ef_.energy(true,dwf,false,fion,false,sigma_eks);
974           tmap["energy"].stop();
975           tmap["wfdiag"].start();
976           s_.wf.diag(dwf,compute_eigvec);
977           tmap["wfdiag"].stop();
978           // print eigenvalues
979           if ( MPIdata::onpe0() )
980             cout << "<eigenset>" << endl;
981           for ( int ispin = 0; ispin < wf.nspin(); ++ispin )
982           {
983             const int isp_loc = wf.isp_local(ispin);
984             for ( int ikp = 0; ikp < wf.nkp(); ++ikp )
985             {
986               const int ikp_loc = wf.ikp_local(ikp);
987               ostringstream ostr;
988 	      ostr.setf(ios::fixed,ios::floatfield);
989 	      ostr.setf(ios::right,ios::adjustfield);
990               int isrc = -1;
991               if ( ( isp_loc >= 0 ) && ( ikp_loc >= 0 ) )
992               {
993                 if ( MPIdata::sd_rank() == 0 )
994                 {
995                   ostr.str("");
996                   isrc = MPIdata::rank();
997                   const int nst = wf.sd(isp_loc,ikp_loc)->nst();
998                   const double eVolt = 2.0 * 13.6058;
999                   ostr <<    "  <eigenvalues spin=\"" << ispin
1000                        << "\" kpoint=\""
1001                        << setprecision(8)
1002                        << wf.sd(isp_loc,ikp_loc)->kpoint()
1003                        << "\" weight=\""
1004                        << setprecision(8)
1005                        << wf.weight(ikp)
1006                        << "\" n=\"" << nst << "\">" << endl;
1007                   for ( int i = 0; i < nst; i++ )
1008                   {
1009                     ostr << setw(12) << setprecision(5)
1010                          << wf.sd(isp_loc,ikp_loc)->eig(i)*eVolt;
1011                     if ( i%5 == 4 ) ostr << endl;
1012                   }
1013                   if ( nst%5 != 0 ) ostr << endl;
1014                   ostr << "  </eigenvalues>" << endl;
1015                 }
1016               }
1017               cout0(ostr.str(),isrc);
1018               MPI_Barrier(MPIdata::comm());
1019             }
1020           }
1021           if ( MPIdata::onpe0() )
1022             cout << "</eigenset>" << endl;
1023           MPI_Barrier(MPIdata::comm());
1024         }
1025 
1026         // update occupation numbers if fractionally occupied states
1027         // compute weighted sum of eigenvalues
1028         // default value if no fractional occupation
1029         double fac = ( wf.nspin() == 1 ) ? 2.0 : 1.0 ;
1030         double w_eigenvalue_sum = fac * eigenvalue_sum;
1031 
1032         if ( fractional_occ )
1033         {
1034           if ( s_.ctrl.fermi_temp > 0 )
1035             wf.update_occ(s_.ctrl.fermi_temp);
1036 #if 0
1037           if ( onpe0 )
1038           {
1039             cout << "  Wavefunction entropy: " << wf_entropy << endl;
1040             cout << "  Entropy contribution to free energy: "
1041                  << - wf_entropy * s_.ctrl.fermi_temp * boltz << endl;
1042           }
1043 #endif
1044           w_eigenvalue_sum = 0.0;
1045           for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
1046           {
1047             for ( int ikp_loc = 0; ikp_loc < wf.nkp_loc(); ++ikp_loc )
1048             {
1049               const int nst = wf.sd(isp_loc,ikp_loc)->nst();
1050               const int ikpg = wf.ikp_global(ikp_loc);
1051               const double wkp = wf.weight(ikpg);
1052               for ( int n = 0; n < nst; n++ )
1053               {
1054                 const double occ = wf.sd(isp_loc,ikp_loc)->occ(n);
1055                 w_eigenvalue_sum += wkp * occ * wf.sd(isp_loc,ikp_loc)->eig(n);
1056               }
1057             }
1058           }
1059           double tsum;
1060           MPI_Allreduce(&w_eigenvalue_sum,&tsum,1,
1061             MPI_DOUBLE,MPI_SUM,MPIdata::kp_sp_comm());
1062           w_eigenvalue_sum = tsum;
1063         }
1064 
1065         // Harris-Foulkes estimate of the total energy
1066         etotal_int = w_eigenvalue_sum - ef_.ehart_e() + ef_.ehart_p() +
1067                      ef_.esr() - ef_.eself() + ef_.dxc() + ef_.ets();
1068 #ifdef DEBUG
1069         if ( onpe0 )
1070         {
1071           cout << setprecision(8);
1072           cout << "w_eigenvalue_sum = " << setw(15) << w_eigenvalue_sum << endl;
1073           cout << "ef.dxc()         = " << setw(15) << ef_.dxc() << endl;
1074           cout << "ef.ehart()       = " << setw(15) << ef_.ehart() << endl;
1075           cout << "ef.ehart_e()     = " << setw(15) << ef_.ehart_e() << endl;
1076           cout << "ef.ehart_ep()    = " << setw(15) << ef_.ehart_ep() << endl;
1077           cout << "ef.esr()         = " << setw(15) << ef_.esr() << endl;
1078         }
1079 #endif
1080 
1081         if ( onpe0 )
1082         {
1083           cout.setf(ios::fixed,ios::floatfield);
1084           cout.setf(ios::right,ios::adjustfield);
1085           cout << "  <etotal_int>  " << setprecision(8) << setw(15)
1086                << etotal_int << " </etotal_int>\n";
1087         }
1088 
1089         etotal_mm = etotal_m;
1090         etotal_m = etotal;
1091         etotal = etotal_int;
1092 
1093         if ( nite_ > 0 && onpe0 )
1094           cout << "  BOSampleStepper: end scf iteration" << endl;
1095 
1096         // delta_etotal = interval containing etotal, etotal_m and etotal_mm
1097         double delta_etotal = fabs(etotal - etotal_m);
1098         delta_etotal = max(delta_etotal,fabs(etotal - etotal_mm));
1099         delta_etotal = max(delta_etotal,fabs(etotal_m - etotal_mm));
1100         // bcast the value of delta_etotal to ensure coherence
1101         // of scf_converged
1102         MPI_Bcast(&delta_etotal,1,MPI_DOUBLE,0,MPIdata::comm());
1103         scf_converged |= (delta_etotal < s_.ctrl.scf_tol);
1104         itscf++;
1105       } // while scf
1106 
1107       if ( compute_mlwf || compute_mlwfc )
1108       {
1109         tmap["mlwf"].start();
1110         for ( int ispin = 0; ispin < wf.nspin(); ++ispin )
1111         {
1112           const int isp_loc = wf.isp_local(ispin);
1113           const int ikp = 0;
1114           const int ikp_loc = wf.ikp_local(ikp);
1115           if ( ( isp_loc >= 0 ) && ( ikp_loc >= 0 ) )
1116           {
1117             mlwft[isp_loc]->update();
1118             mlwft[isp_loc]->compute_transform();
1119 
1120             if ( compute_mlwf )
1121             {
1122               SlaterDet& sd = *(wf.sd(isp_loc,ikp_loc));
1123               mlwft[isp_loc]->apply_transform(sd);
1124             }
1125           }
1126         }
1127 
1128         // print mlwf centers
1129         D3vector edipole_sum, d3tsum;
1130         if ( onpe0 )
1131           cout << "<mlwfs>" << endl;
1132         for ( int ispin = 0; ispin < wf.nspin(); ++ispin )
1133         {
1134           const int isp_loc = wf.isp_local(ispin);
1135           const int ikp = 0;
1136           const int ikp_loc = wf.ikp_local(ikp);
1137           ostringstream ostr;
1138           int isrc = -1;
1139           if ( ( isp_loc >= 0 ) && ( ikp_loc >= 0 ) )
1140           {
1141             SlaterDet& sd = *(wf.sd(isp_loc,ikp_loc));
1142             if ( MPIdata::sd_rank() == 0 )
1143             {
1144               ostr.str("");
1145 	      ostr.setf(ios::fixed,ios::floatfield);
1146 	      ostr.setf(ios::right,ios::adjustfield);
1147               isrc = MPIdata::rank();
1148               ostr << " <mlwfset spin=\"" << ispin
1149                    << "\" size=\"" << sd.nst() << "\">" << endl;
1150               double total_spread[6];
1151               for ( int j = 0; j < 6; j++ )
1152                  total_spread[j] = 0.0;
1153               for ( int i = 0; i < sd.nst(); i++ )
1154               {
1155                 D3vector ctr = mlwft[isp_loc]->center(i);
1156                 double spi[6];
1157                 for (int j=0; j<3; j++)
1158                 {
1159                   spi[j] = mlwft[isp_loc]->spread2(i,j);
1160                   total_spread[j] += spi[j];
1161                 }
1162 
1163                 ostr << "   <mlwf center=\"" << setprecision(6)
1164                      << setw(12) << ctr.x
1165                      << setw(12) << ctr.y
1166                      << setw(12) << ctr.z
1167                      << " \" spread=\" "
1168                      << setw(12) << spi[0]
1169                      << setw(12) << spi[1]
1170                      << setw(12) << spi[2] << " \"/>"
1171                      << endl;
1172               }
1173 
1174               ostr << " <total_spread> ";
1175               for ( int j = 0; j < 3; j++ )
1176                 ostr << setprecision(6) << setw(15) << total_spread[j];
1177               ostr << " </total_spread>" << endl;
1178               D3vector edipole = mlwft[isp_loc]->dipole();
1179               ostr << " <electronic_dipole spin=\"" << ispin
1180                    << "\"> " << edipole << " </electronic_dipole>" << endl;
1181               edipole_sum += edipole;
1182               ostr << " </mlwfset>" << endl;
1183             } // sd_rank() == 0
1184           }
1185           cout0(ostr.str(),isrc);
1186           MPI_Barrier(MPIdata::comm());
1187         } // ispin
1188         if ( onpe0 )
1189           cout << "</mlwfs>" << endl;
1190 
1191         if ( MPIdata::sd_rank() == 0 )
1192         {
1193           MPI_Reduce(&edipole_sum[0],&d3tsum[0],3,
1194                      MPI_DOUBLE,MPI_SUM,0,MPIdata::sp_comm());
1195           edipole_sum = d3tsum;
1196         }
1197 
1198         if ( onpe0 )
1199         {
1200           D3vector idipole = atoms.dipole();
1201           cout << setprecision(6);
1202           cout << " <ionic_dipole> " << idipole
1203                << " </ionic_dipole>" << endl;
1204           cout << " <total_dipole> " << idipole + edipole_sum
1205                << " </total_dipole>" << endl;
1206           cout << " <total_dipole_length> " << length(idipole + edipole_sum)
1207                << " </total_dipole_length>" << endl;
1208         }
1209         tmap["mlwf"].stop();
1210       }
1211 
1212       // If GS calculation only, print energy and atomset at end of iterations
1213       if ( gs_only )
1214       {
1215         tmap["charge"].start();
1216         cd_.update_density();
1217         tmap["charge"].stop();
1218 
1219         tmap["update_vhxc"].start();
1220         ef_.update_vhxc(compute_stress);
1221         tmap["update_vhxc"].stop();
1222         const bool compute_forces = true;
1223         tmap["energy"].start();
1224         ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks);
1225         tmap["energy"].stop();
1226 
1227         if ( onpe0 )
1228         {
1229           cout << ef_;
1230           if ( ef_.el_enth() )
1231             cout << *ef_.el_enth();
1232           cout << "<atomset>" << endl;
1233           cout << atoms.cell();
1234           for ( int is = 0; is < atoms.atom_list.size(); is++ )
1235           {
1236             int i = 0;
1237             for ( int ia = 0; ia < atoms.atom_list[is].size(); ia++ )
1238             {
1239               Atom* pa = atoms.atom_list[is][ia];
1240               cout << "  <atom name=\"" << pa->name() << "\""
1241                    << " species=\"" << pa->species()
1242                    << "\">\n"
1243                    << "    <position> " << pa->position() << " </position>\n"
1244                    << "    <velocity> " << pa->velocity() << " </velocity>\n"
1245                    << "    <force> "
1246                    << fion[is][i] << " "
1247                    << fion[is][i+1] << " "
1248                    << fion[is][i+2]
1249                    << " </force>\n";
1250               cout << "  </atom>" << endl;
1251               i += 3;
1252             }
1253           }
1254           cout << "</atomset>" << endl;
1255           cout << setprecision(6);
1256           cout << "<unit_cell_a_norm> " << atoms.cell().a_norm(0)
1257                << " </unit_cell_a_norm>" << endl;
1258           cout << "<unit_cell_b_norm> " << atoms.cell().a_norm(1)
1259                << " </unit_cell_b_norm>" << endl;
1260           cout << "<unit_cell_c_norm> " << atoms.cell().a_norm(2)
1261                << " </unit_cell_c_norm>" << endl;
1262           cout << setprecision(3) << "<unit_cell_alpha>  "
1263                << atoms.cell().alpha() << " </unit_cell_alpha>" << endl;
1264           cout << setprecision(3) << "<unit_cell_beta>   "
1265                << atoms.cell().beta() << " </unit_cell_beta>" << endl;
1266           cout << setprecision(3) << "<unit_cell_gamma>  "
1267                << atoms.cell().gamma() << " </unit_cell_gamma>" << endl;
1268           cout << setprecision(3) << "<unit_cell_volume> "
1269                << atoms.cell().volume() << " </unit_cell_volume>" << endl;
1270           if ( compute_stress )
1271           {
1272             compute_sigma();
1273             print_stress();
1274           }
1275         }
1276       }
1277       wf_stepper->postprocess();
1278     }
1279     else
1280     {
1281       // wf_stepper == 0, wf_dyn == LOCKED
1282       // evaluate and print energy
1283       tmap["charge"].start();
1284       cd_.update_density();
1285       tmap["charge"].stop();
1286       tmap["update_vhxc"].start();
1287       ef_.update_vhxc(compute_stress);
1288       tmap["update_vhxc"].stop();
1289       tmap["energy"].start();
1290       ef_.energy(true,dwf,false,fion,false,sigma_eks);
1291       tmap["energy"].stop();
1292       if ( onpe0 )
1293       {
1294         cout << cd_;
1295         cout << ef_;
1296         if ( ef_.el_enth() )
1297           cout << *ef_.el_enth();
1298       }
1299     }
1300 
1301     // if using force_tol or stress_tol, check if maxforce and maxstress
1302     // within tolerance
1303     if ( force_tol > 0.0 )
1304     {
1305       if ( onpe0 )
1306         cout << "  maxforce: " << scientific
1307              << setprecision(4) << maxforce << fixed << endl;
1308       iter_done |= ( maxforce < force_tol );
1309     }
1310     if ( stress_tol > 0.0 )
1311     {
1312       if ( onpe0 )
1313         cout << "  maxstress: " << scientific
1314              << setprecision(4) << maxstress << fixed << endl;
1315       iter_done |= ( maxstress < stress_tol );
1316     }
1317 
1318     // print iteration time
1319 
1320     double time = tm_iter.real();
1321     double tmin, tmax;
1322     MPI_Reduce(&time,&tmin,1,MPI_DOUBLE,MPI_MIN,0,MPIdata::comm());
1323     MPI_Reduce(&time,&tmax,1,MPI_DOUBLE,MPI_MAX,0,MPIdata::comm());
1324     if ( onpe0 )
1325     {
1326       string s = "name=\"iteration\"";
1327       cout << "<timing " << left << setw(22) << s
1328            << " min=\"" << setprecision(3) << tmin << "\""
1329            << " max=\"" << setprecision(3) << tmax << "\"/>"
1330            << endl;
1331     }
1332 
1333     if ( onpe0 )
1334       cout << "</iteration>" << endl;
1335 
1336   } // for iter
1337 
1338   if ( atoms_move )
1339   {
1340     // compute ionic forces at last position to update velocities
1341     // consistently with last position
1342     tmap["charge"].start();
1343     cd_.update_density();
1344     tmap["charge"].stop();
1345 
1346     tmap["update_vhxc"].start();
1347     ef_.update_vhxc(compute_stress);
1348     tmap["update_vhxc"].stop();
1349     const bool compute_forces = true;
1350     tmap["energy"].start();
1351     double energy =
1352       ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks);
1353     tmap["energy"].stop();
1354 
1355     ionic_stepper->compute_v(energy,fion);
1356     // positions r0 and velocities v0 are consistent
1357   }
1358 
1359   if ( atoms_move && extrapolate_wf )
1360   {
1361     // compute wavefunction velocity after last iteration
1362     // s_.wfv contains the previous wavefunction
1363 
1364     tmap["align"].start();
1365     s_.wfv->align(s_.wf);
1366     tmap["align"].stop();
1367 
1368     for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
1369     {
1370       for ( int ikp_loc = 0; ikp_loc < wf.nkp_loc(); ++ikp_loc )
1371       {
1372         double* c = (double*) s_.wf.sd(isp_loc,ikp_loc)->c().cvalptr();
1373         double* cm = (double*) s_.wfv->sd(isp_loc,ikp_loc)->c().cvalptr();
1374         const int mloc = s_.wf.sd(isp_loc,ikp_loc)->c().mloc();
1375         const int nloc = s_.wf.sd(isp_loc,ikp_loc)->c().nloc();
1376         const int len = 2*mloc*nloc;
1377         const double dt_inv = 1.0 / dt;
1378         if ( ntc_extrapolation )
1379         {
1380           double* cmm = (double*) wfmm->sd(isp_loc,ikp_loc)->c().cvalptr();
1381           for ( int i = 0; i < len; i++ )
1382           {
1383             const double x = c[i];
1384             const double xmm = cmm[i];
1385             cm[i] = dt_inv * ( x - xmm );
1386           }
1387           tmap["gram"].start();
1388           s_.wf.sd(isp_loc,ikp_loc)->gram();
1389           tmap["gram"].stop();
1390         }
1391         else // normal extrapolation or asp_extrapolation
1392         {
1393           for ( int i = 0; i < len; i++ )
1394           {
1395             const double x = c[i];
1396             const double xm = cm[i];
1397             cm[i] = dt_inv * ( x - xm );
1398           }
1399           tmap["gram"].start();
1400           s_.wf.sd(isp_loc,ikp_loc)->gram();
1401           tmap["gram"].stop();
1402         }
1403       }
1404     }
1405 
1406     // compute ionic forces at last position to update velocities
1407     // consistently with last position
1408     tmap["charge"].start();
1409     cd_.update_density();
1410     tmap["charge"].stop();
1411 
1412     tmap["update_vhxc"].start();
1413     ef_.update_vhxc(compute_stress);
1414     tmap["update_vhxc"].stop();
1415     const bool compute_forces = true;
1416     tmap["energy"].start();
1417     double energy =
1418       ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks);
1419     tmap["energy"].stop();
1420 
1421     ionic_stepper->compute_v(energy,fion);
1422     // positions r0 and velocities v0 are consistent
1423   }
1424 
1425   for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
1426     delete mlwft[isp_loc];
1427 
1428   // delete steppers
1429   delete wf_stepper;
1430   delete ionic_stepper;
1431   delete cell_stepper;
1432 
1433   if ( ntc_extrapolation || asp_extrapolation ) delete wfmm;
1434 
1435   update_density_first_ = true;
1436 }
1437