1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2008 The Regents of the University of California
4 //
5 // This file is part of Qbox
6 //
7 // Qbox is distributed under the terms of the GNU General Public License
8 // as published by the Free Software Foundation, either version 2 of
9 // the License, or (at your option) any later version.
10 // See the file COPYING in the root directory of this distribution
11 // or <http://www.gnu.org/licenses/>.
12 //
13 ////////////////////////////////////////////////////////////////////////////////
14 //
15 // BOSampleStepper.cpp
16 //
17 ////////////////////////////////////////////////////////////////////////////////
18
19 #include "BOSampleStepper.h"
20 #include "EnergyFunctional.h"
21 #include "SlaterDet.h"
22 #include "Basis.h"
23 #include "WavefunctionStepper.h"
24 #include "SDWavefunctionStepper.h"
25 #include "PSDWavefunctionStepper.h"
26 #include "PSDAWavefunctionStepper.h"
27 #include "JDWavefunctionStepper.h"
28 #include "UserInterface.h"
29 #include "Preconditioner.h"
30 #include "SDIonicStepper.h"
31 #include "SDAIonicStepper.h"
32 #include "CGIonicStepper.h"
33 #include "ANDIonicStepper.h"
34 #include "MDIonicStepper.h"
35 #include "BMDIonicStepper.h"
36 #include "SDCellStepper.h"
37 #include "CGCellStepper.h"
38 #include "AndersonMixer.h"
39 #include "MLWFTransform.h"
40 #include "D3tensor.h"
41 #include "cout0.h"
42
43 #include <iostream>
44 #include <iomanip>
45 using namespace std;
46
47 ////////////////////////////////////////////////////////////////////////////////
BOSampleStepper(Sample & s,int nitscf,int nite)48 BOSampleStepper::BOSampleStepper(Sample& s, int nitscf, int nite) :
49 SampleStepper(s), cd_(s.wf), ef_(s,cd_),
50 dwf(s.wf), nitscf_(nitscf), nite_(nite),
51 update_density_first_(true), update_vh_(true), update_vxc_(true) {}
52
53 ////////////////////////////////////////////////////////////////////////////////
~BOSampleStepper()54 BOSampleStepper::~BOSampleStepper()
55 {
56 for ( TimerMap::iterator i = tmap.begin(); i != tmap.end(); i++ )
57 {
58 double time = i->second.real();
59 double tmin, tmax;
60 MPI_Reduce(&time,&tmin,1,MPI_DOUBLE,MPI_MIN,0,MPIdata::comm());
61 MPI_Reduce(&time,&tmax,1,MPI_DOUBLE,MPI_MAX,0,MPIdata::comm());
62 if ( MPIdata::onpe0() && (tmax > 0.0) )
63 {
64 string s = "name=\"" + i->first + "\"";
65 cout << "<timing " << left << setw(22) << s
66 << " min=\"" << setprecision(3) << tmin << "\""
67 << " max=\"" << setprecision(3) << tmax << "\"/>"
68 << endl;
69 }
70 }
71 }
72
73 ////////////////////////////////////////////////////////////////////////////////
initialize_density(void)74 void BOSampleStepper::initialize_density(void)
75 {
76 // initialize cd_ with a sum of atomic densities
77
78 double atom_radius[] =
79 {
80 0.000, 1.542, 0.931, 1.727, 1.636, // null H He Li Be
81 1.845, 1.538, 1.311, 1.141, 1.014, // B C N O F
82 0.983, 1.238, 1.347, 1.376, 2.151, // Ne Na Mg Al Si
83 1.927, 1.733, 1.563, 1.429, 1.546, // P S Cl Ar K
84 1.663, 1.604, 1.516, 1.434, 1.377, // Ca Sc Ti V Cr
85 1.310, 1.245, 1.201, 1.162, 1.098, // Mn Fe Co Ni Cu
86 1.077, 1.331, 1.415, 2.015, 1.880, // Zn Ga Ge As Se
87 1.749, 1.630, 1.705, 1.819, 1.794, // Br Kr Rb Sr Y
88 1.728, 1.664, 1.589, 1.523, 1.461, // Zr Nb Mo Tc Ru
89 1.410, 1.348, 1.306, 1.303, 1.554, // Rh Pd Ag Cd In
90 1.609, 1.611, 1.530, 1.514, 1.464, // Sn Sb Te I Xe
91 1.946, 1.967, 1.943, 1.930, 1.920, // Cs Ba La Ce Pr
92 1.910, 1.900, 1.890, 1.880, 1.870, // Nd Pm Sm Eu Gd
93 1.860, 1.850, 1.840, 1.830, 1.820, // Tb Dy Ho Er Tm
94 1.810, 1.800, 1.701, 1.658, 1.606, // Yb Lu Hf Ta W
95 1.550, 1.500, 1.446, 1.398, 1.355, // Re Os Ir Pt Au
96 1.314, 1.624, 1.659, 1.634, 1.620, // Hg Tl Pb Bi Po
97 1.600, 1.500, 1.600, 1.600, 1.600, // At Rn Fr Ra Ac
98 1.600, 1.600, 1.600, 1.600, 1.600, // Th Pa U Np Pu
99 1.600, 1.600, 1.600, 1.600, 1.600, // Am Cm Bk Cf Es
100 1.600, 1.600, 1.600, 1.600 // Fm Md No Lr
101 };
102
103 const AtomSet& atoms = s_.atoms;
104 const Basis* const vbasis = cd_.vbasis();
105 const int ngloc = vbasis->localsize();
106 vector<vector<complex<double> > > rhops;
107 const int nsp = atoms.nsp();
108 rhops.resize(nsp);
109 vector<complex<double> > rhopst(ngloc);
110 const double * const g2 = vbasis->g2_ptr();
111
112 for ( int is = 0; is < nsp; is++ )
113 {
114 rhops[is].resize(ngloc);
115 Species *s = atoms.species_list[is];
116 const int zval = s->zval();
117 const int atomic_number = s->atomic_number();
118 assert(atomic_number < sizeof(atom_radius)/sizeof(double));
119 double rc = atom_radius[atomic_number];
120 for ( int ig = 0; ig < ngloc; ig++ )
121 {
122 double arg = 0.25 * rc * rc * g2[ig];
123 rhops[is][ig] = zval * exp( -arg );
124 }
125 }
126
127 vector<vector<double> > tau0;
128 tau0.resize(nsp);
129 for ( int is = 0; is < nsp; is++ )
130 tau0.resize(3*atoms.na(is));
131 atoms.get_positions(tau0);
132 StructureFactor sf;
133 sf.init(tau0,*vbasis);
134 sf.update(tau0,*vbasis);
135
136 memset( (void*)&rhopst[0], 0, 2*ngloc*sizeof(double) );
137 for ( int is = 0; is < nsp; is++ )
138 {
139 complex<double> *s = &sf.sfac[is][0];
140 for ( int ig = 0; ig < ngloc; ig++ )
141 {
142 const complex<double> sg = s[ig];
143 rhopst[ig] += sg * rhops[is][ig];
144 }
145 }
146
147 // Adjust G=0 component of the charge if net_charge is non-zero
148 rhopst[0] += s_.wf.nel() - atoms.nel();
149
150 // Initialize charge equally for both spins
151 cd_.rhog[0] = rhopst;
152 if ( cd_.rhog.size() == 2 )
153 {
154 assert(cd_.rhog[0].size()==cd_.rhog[1].size());
155 for ( int i = 0; i < cd_.rhog[0].size(); i++ )
156 {
157 cd_.rhog[0][i] = 0.5 * rhopst[i];
158 cd_.rhog[1][i] = 0.5 * rhopst[i];
159 }
160 }
161 cd_.update_rhor();
162 update_density_first_ = false;
163 }
164
165 ////////////////////////////////////////////////////////////////////////////////
step(int niter)166 void BOSampleStepper::step(int niter)
167 {
168 const bool anderson_charge_mixing = ( s_.ctrl.charge_mix_ndim > 0 );
169
170 // determine whether eigenvectors must be computed
171 // eigenvectors are computed if explicitly requested with wf_diag==T
172 // or if the SlaterDet has fractionally occupied states
173 const bool fractional_occ = (s_.wf.nel() != 2 * s_.wf.nst());
174 const bool compute_eigvec = fractional_occ || s_.ctrl.wf_diag == "T";
175 const bool compute_mlwf = s_.ctrl.wf_diag == "MLWF";
176 const bool compute_mlwfc = s_.ctrl.wf_diag == "MLWFC";
177 enum ortho_type { GRAM, LOWDIN, ORTHO_ALIGN, RICCATI };
178
179 const double gpa = 29421.5;
180
181 AtomSet& atoms = s_.atoms;
182 Wavefunction& wf = s_.wf;
183 Wavefunction*& wfv = s_.wfv;
184 const bool onpe0 = MPIdata::onpe0();
185 const Context& sd_ctxt = wf.sd_context();
186
187 const int nspin = wf.nspin();
188
189 const double dt = s_.ctrl.dt;
190
191 const string wf_dyn = s_.ctrl.wf_dyn;
192 const string atoms_dyn = s_.ctrl.atoms_dyn;
193 const string cell_dyn = s_.ctrl.cell_dyn;
194
195 const bool extrapolate_wf = ( atoms_dyn == "MD" );
196
197 bool ntc_extrapolation = false;
198 bool asp_extrapolation = false;
199
200 const map<string,string>& debug_map = s_.ctrl.debug;
201
202 map<string,string>::const_iterator imap =
203 debug_map.find("EXTRAPOLATION");
204 if ( imap != debug_map.end() )
205 {
206 const string val = imap->second;
207 if ( val == "NTC" ) ntc_extrapolation = true;
208 if ( val == "ASP" ) asp_extrapolation = true;
209 }
210
211 Wavefunction* wfmm;
212 if ( extrapolate_wf && ( ntc_extrapolation || asp_extrapolation ) )
213 wfmm = new Wavefunction(wf);
214
215 // Next lines: special value of niter = 0: GS calculation only
216 const bool atoms_move = ( niter > 0 && atoms_dyn != "LOCKED" );
217 const bool compute_stress = ( s_.ctrl.stress == "ON" );
218 const bool cell_moves = ( niter > 0 && compute_stress &&
219 cell_dyn != "LOCKED" );
220 // GS-only calculation:
221 const bool gs_only = !atoms_move && !cell_moves;
222
223 const double force_tol = s_.ctrl.force_tol;
224 const double stress_tol = s_.ctrl.stress_tol;
225
226 Timer tm_iter;
227
228 Preconditioner prec(wf,ef_,s_.ctrl.ecutprec);
229
230 WavefunctionStepper* wf_stepper = 0;
231 if ( wf_dyn == "SD" )
232 {
233 const double emass = s_.ctrl.emass;
234 double dt2bye = (emass == 0.0) ? 0.5 / wf.ecut() : dt*dt/emass;
235
236 // divide dt2bye by facs coefficient if stress == ON
237 const double facs = 2.0;
238 if ( s_.ctrl.stress == "ON" )
239 {
240 dt2bye /= facs;
241 }
242 wf_stepper = new SDWavefunctionStepper(wf,dt2bye,tmap);
243 }
244 else if ( wf_dyn == "PSD" )
245 wf_stepper = new PSDWavefunctionStepper(wf,prec,tmap);
246 else if ( wf_dyn == "PSDA" )
247 wf_stepper = new PSDAWavefunctionStepper(wf,prec,tmap);
248 else if ( wf_dyn == "JD" )
249 wf_stepper = new JDWavefunctionStepper(wf,prec,ef_,tmap);
250
251 // wf_stepper == 0 indicates that wf_dyn == LOCKED
252
253 IonicStepper* ionic_stepper = 0;
254 if ( atoms_dyn == "SD" )
255 ionic_stepper = new SDIonicStepper(s_);
256 else if ( atoms_dyn == "SDA" )
257 ionic_stepper = new SDAIonicStepper(s_);
258 else if ( atoms_dyn == "CG" )
259 ionic_stepper = new CGIonicStepper(s_);
260 else if ( atoms_dyn == "AND" )
261 ionic_stepper = new ANDIonicStepper(s_);
262 else if ( atoms_dyn == "MD" )
263 ionic_stepper = new MDIonicStepper(s_);
264 else if ( atoms_dyn == "BMD" )
265 ionic_stepper = new BMDIonicStepper(s_);
266
267 if ( ionic_stepper )
268 ionic_stepper->setup_constraints();
269
270 CellStepper* cell_stepper = 0;
271 if ( cell_dyn == "SD" )
272 cell_stepper = new SDCellStepper(s_);
273 else if ( cell_dyn == "CG" )
274 cell_stepper = new CGCellStepper(s_);
275
276 // Allocate wavefunction velocity if not available
277 if ( atoms_move && extrapolate_wf )
278 {
279 if ( wfv == 0 )
280 {
281 wfv = new Wavefunction(wf);
282 wfv->clear();
283 }
284 }
285
286 vector<MLWFTransform*> mlwft(wf.nsp_loc());
287
288 if ( compute_mlwf || compute_mlwfc )
289 {
290 // MLWF can be computed at the gamma point only
291 // There must be a single k-point, and it must be gamma
292 if ( wf.nkp() > 1 || ( wf.nkp()==1 && wf.kpoint(0) != D3vector(0,0,0) ) )
293 {
294 if ( onpe0 )
295 {
296 cout << " BOSampleStepper::step: MLWF can be computed at k=0 only"
297 << endl;
298 cout << " BOSampleStepper::step: cannot run" << endl;
299 }
300 return;
301 }
302
303 for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
304 {
305 const int ikp = 0;
306 const int ikp_loc = wf.ikp_local(ikp);
307 if ( ikp_loc >= 0 )
308 mlwft[isp_loc] = new MLWFTransform(*wf.sd(isp_loc,ikp_loc));
309 }
310 }
311
312 // Charge mixing variables: include both spins in the same vector
313 if ( nspin > 1 ) assert(cd_.rhog[0].size()==cd_.rhog[1].size());
314 const int ng = cd_.rhog[0].size();
315 vector<complex<double> > rhog_in(nspin*ng), drhog(nspin*ng),
316 rhobar(nspin*ng), drhobar(nspin*ng);
317 const int anderson_ndim = s_.ctrl.charge_mix_ndim;
318 vector<double> wkerker(ng), wls(ng);
319
320 // Anderson charge mixer: include both spins in the same vector
321 // Factor of 2: complex coeffs stored as double
322 MPI_Comm vcomm = MPIdata::g_comm();
323 AndersonMixer mixer(2*nspin*ng,anderson_ndim,&vcomm);
324
325 // compute Kerker preconditioning
326 // real space Kerker cutoff in a.u.
327 const double rc_Kerker = s_.ctrl.charge_mix_rcut;
328 const double *const g2 = cd_.vbasis()->g2_ptr();
329
330 // define q1 cutoff for row weighting of LS charge mixing
331 // Use rc1 = 3 a.u. default cutoff
332 double rc1 = 3.0;
333 // check if override from the debug map
334 // use: set debug RC1 <value>
335 imap = debug_map.find("RC1");
336 if ( imap != debug_map.end() )
337 {
338 const string val = imap->second;
339 istringstream is(val);
340 is >> rc1;
341 if ( onpe0 )
342 cout << " override rc1 value: rc1 = " << rc1 << endl;
343 assert(rc1 >= 0.0);
344 }
345
346 double delta_ratio = 0.01;
347 imap = debug_map.find("DELTA_RATIO");
348 if ( imap != debug_map.end() )
349 {
350 const string val = imap->second;
351 istringstream is(val);
352 is >> delta_ratio;
353 if ( onpe0 )
354 cout << " override delta_ratio value = " << delta_ratio << endl;
355 assert(delta_ratio >= 0.0);
356 }
357
358 if ( rc1 != 0.0 )
359 {
360 const double q1 = 2.0 * M_PI / rc1;
361 for ( int i = 0; i < wls.size(); i++ )
362 {
363 if ( g2[i] != 0.0 )
364 wls[i] = sqrt(g2[i] / ( g2[i] + q1*q1 ));
365 else
366 wls[i] = 1.0;
367 }
368 }
369 else
370 {
371 for ( int i = 0; i < wls.size(); i++ )
372 wls[i] = 1.0;
373 }
374
375 if ( rc_Kerker > 0.0 )
376 {
377 const double q0_kerker = 2 * M_PI / rc_Kerker;
378 const double q0_kerker2 = q0_kerker * q0_kerker;
379 for ( int i = 0; i < wkerker.size(); i++ )
380 {
381 if ( g2[i] != 0.0 )
382 wkerker[i] = g2[i] / ( g2[i] + q0_kerker2 );
383 else
384 wkerker[i] = 1.0;
385 }
386 }
387 else
388 {
389 for ( int i = 0; i < wkerker.size(); i++ )
390 wkerker[i] = 1.0;
391 }
392
393 if ( onpe0 )
394 cout << "<net_charge> " << atoms.nel()-wf.nel() << " </net_charge>\n";
395
396 // Next line: special case of niter=0: compute GS only
397 bool iter_done = false;
398 for ( int iter = 0; iter < max(niter,1) && !iter_done; iter++ )
399 {
400 // ionic iteration
401
402 tm_iter.start();
403
404 if ( onpe0 )
405 cout << "<iteration count=\"" << iter+1 << "\">\n";
406
407 // compute energy and ionic forces using existing wavefunction
408 double maxforce = 0.0;
409 double maxstress = 0.0;
410
411 if ( !gs_only )
412 {
413 tmap["charge"].start();
414 cd_.update_density();
415 tmap["charge"].stop();
416
417 tmap["update_vhxc"].start();
418 ef_.update_vhxc(compute_stress);
419 tmap["update_vhxc"].stop();
420 const bool compute_forces = true;
421 tmap["energy"].start();
422 double energy =
423 ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks);
424 tmap["energy"].stop();
425 double enthalpy = ef_.enthalpy();
426
427 if ( force_tol > 0.0 )
428 {
429 maxforce = 0.0;
430 for ( int is = 0; is < fion.size(); is++ )
431 for ( int i = 0; i < fion[is].size(); i++ )
432 maxforce = max(maxforce, fabs(fion[is][i]));
433 }
434
435 if ( stress_tol > 0.0 )
436 {
437 compute_sigma();
438 for ( int i = 0; i < sigma.size(); i++ )
439 maxstress = max(maxstress, gpa*fabs(sigma[i]));
440 }
441
442 if ( onpe0 )
443 {
444 cout << cd_;
445 cout << ef_;
446 if ( ef_.el_enth() )
447 cout << *ef_.el_enth();
448 }
449
450 if ( iter > 0 && ionic_stepper )
451 {
452 ionic_stepper->compute_v(energy,fion);
453 }
454 // at this point, positions r0, velocities v0 and forces fion are
455 // consistent
456
457 // execute commands in iter_cmd if defined
458 if ( !iter_cmd_.empty() )
459 {
460 if ( iter % iter_cmd_period_ == 0 )
461 {
462 // copy positions and velocities from IonicStepper to AtomSet
463 if ( ionic_stepper )
464 {
465 ionic_stepper->set_positions();
466 ionic_stepper->set_velocities();
467 }
468 // command must be terminated with \n
469 istringstream cmdstream(iter_cmd_ + "\n");
470 s_.ui->processCmds(cmdstream,"[iter_cmd]",true);
471 // copy positions and velocities back from AtomSet
472 if ( ionic_stepper )
473 {
474 ionic_stepper->get_positions();
475 ionic_stepper->get_velocities();
476 }
477 }
478 }
479
480 double ekin_ion = 0.0, temp_ion = 0.0;
481 if ( ionic_stepper )
482 {
483 ekin_ion = ionic_stepper->ekin();
484 temp_ion = ionic_stepper->temp();
485 }
486
487 // print positions, velocities and forces at time t0
488 if ( onpe0 )
489 {
490 cout << "<atomset>" << endl;
491 cout << atoms.cell();
492 for ( int is = 0; is < atoms.atom_list.size(); is++ )
493 {
494 int i = 0;
495 for ( int ia = 0; ia < atoms.atom_list[is].size(); ia++ )
496 {
497 Atom* pa = atoms.atom_list[is][ia];
498 cout << " <atom name=\"" << pa->name() << "\""
499 << " species=\"" << pa->species()
500 << "\">\n"
501 << " <position> " << pa->position() << " </position>\n"
502 << " <velocity> " << pa->velocity() << " </velocity>\n"
503 << " <force> "
504 << fion[is][i] << " "
505 << fion[is][i+1] << " "
506 << fion[is][i+2]
507 << " </force>\n";
508 cout << " </atom>" << endl;
509 i += 3;
510 }
511 }
512 cout << "</atomset>" << endl;
513 cout << setprecision(6);
514 cout << "<unit_cell_a_norm> " << atoms.cell().a_norm(0)
515 << " </unit_cell_a_norm>" << endl;
516 cout << "<unit_cell_b_norm> " << atoms.cell().a_norm(1)
517 << " </unit_cell_b_norm>" << endl;
518 cout << "<unit_cell_c_norm> " << atoms.cell().a_norm(2)
519 << " </unit_cell_c_norm>" << endl;
520 cout << setprecision(3) << "<unit_cell_alpha> "
521 << atoms.cell().alpha() << " </unit_cell_alpha>" << endl;
522 cout << setprecision(3) << "<unit_cell_beta> "
523 << atoms.cell().beta() << " </unit_cell_beta>" << endl;
524 cout << setprecision(3) << "<unit_cell_gamma> "
525 << atoms.cell().gamma() << " </unit_cell_gamma>" << endl;
526 cout << setprecision(3) << "<unit_cell_volume> "
527 << atoms.cell().volume() << " </unit_cell_volume>" << endl;
528
529 // include the kinetic energy of the stepper
530 // e.g. to include thermostat contributions
531 double ekin_stepper;
532 if ( ionic_stepper != 0 )
533 ekin_stepper = ionic_stepper->ekin_stepper();
534 cout << setprecision(8);
535 cout << " <econst> " << enthalpy+ekin_ion+ekin_stepper
536 << " </econst>\n";
537 cout << " <ekin_ion> " << ekin_ion << " </ekin_ion>\n";
538 cout << " <temp_ion> " << temp_ion << " </temp_ion>\n";
539 }
540
541 if ( atoms_move )
542 {
543 if ( s_.constraints.size() > 0 )
544 {
545 s_.constraints.update_constraints(dt);
546 s_.constraints.compute_forces(ionic_stepper->r0(), fion);
547 if ( onpe0 )
548 {
549 s_.constraints.list_constraints(cout);
550 }
551 }
552 // move atoms to new position: r0 <- r0 + v0*dt + dt2/m * fion
553 ionic_stepper->compute_r(energy,fion);
554 ef_.atoms_moved();
555 }
556
557 if ( compute_stress )
558 {
559 compute_sigma();
560 print_stress();
561
562 if ( cell_moves )
563 {
564 cell_stepper->compute_new_cell(enthalpy,sigma,fion);
565
566 // Update cell
567 cell_stepper->update_cell();
568
569 ef_.cell_moved();
570 ef_.atoms_moved(); // modifications of the cell also move ions
571 }
572 }
573 } // if !gs_only
574
575 // Recalculate ground state wavefunctions
576 // wavefunction extrapolation
577 if ( atoms_move && extrapolate_wf )
578 {
579 for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
580 {
581 for ( int ikp_loc = 0; ikp_loc < wf.nkp_loc(); ++ikp_loc )
582 {
583 if ( ntc_extrapolation )
584 {
585 SlaterDet* sd = wf.sd(isp_loc,ikp_loc);
586 SlaterDet* sdv = wfv->sd(isp_loc,ikp_loc);
587 SlaterDet* sdmm = wfmm->sd(isp_loc,ikp_loc);
588 double* c = (double*) sd->c().cvalptr();
589 double* cv = (double*) sdv->c().cvalptr();
590 double* cmm = (double*) sdmm->c().cvalptr();
591 const int mloc = sd->c().mloc();
592 const int nloc = sd->c().nloc();
593 const int len = 2*mloc*nloc;
594 if ( iter == 0 )
595 {
596 // copy c on cv
597 for ( int i = 0; i < len; i++ )
598 {
599 const double x = c[i];
600 const double v = cv[i];
601 // extrapolation using velocity in cv
602 c[i] = x + dt * v;
603 cv[i] = x;
604 }
605 tmap["gram"].start();
606 sd->gram();
607 tmap["gram"].stop();
608 }
609 else if ( iter == 1 )
610 {
611 sdv->align(*sd);
612 for ( int i = 0; i < len; i++ )
613 {
614 const double x = c[i];
615 const double xm = cv[i];
616 c[i] = 2.0 * x - xm;
617 cv[i] = x;
618 cmm[i] = xm;
619 }
620 tmap["gram"].start();
621 sd->gram();
622 tmap["gram"].stop();
623 }
624 else
625 {
626 // align wf with wfmm before extrapolation
627 sd->align(*sdmm);
628
629 // extrapolate
630 for ( int i = 0; i < len; i++ )
631 {
632 const double x = c[i]; // current wf (scf converged) at t
633 const double xm = cv[i]; // extrapolated wf at t
634 const double xmm = cmm[i]; // extrapolated wf at t-dt
635 c[i] = 2.0 * x - xmm;
636 // save extrapolated value at t in cmm
637 cmm[i] = xm;
638 }
639 // orthogonalize the extrapolated value
640 tmap["gram"].start();
641 sd->gram();
642 tmap["gram"].stop();
643
644 // c[i] now contains the extrapolated value
645 // save a copy in cv[i]
646 for ( int i = 0; i < len; i++ )
647 {
648 cv[i] = c[i];
649 }
650 }
651 // c[i] is now ready for electronic iterations
652 }
653 else if ( asp_extrapolation )
654 {
655 SlaterDet* sd = wf.sd(isp_loc,ikp_loc);
656 SlaterDet* sdv = wfv->sd(isp_loc,ikp_loc);
657 SlaterDet* sdmm = wfmm->sd(isp_loc,ikp_loc);
658 double* c = (double*) sd->c().cvalptr();
659 double* cv = (double*) sdv->c().cvalptr();
660 double* cmm = (double*) sdmm->c().cvalptr();
661 const int mloc = s_.wf.sd(isp_loc,ikp_loc)->c().mloc();
662 const int nloc = s_.wf.sd(isp_loc,ikp_loc)->c().nloc();
663 const int len = 2*mloc*nloc;
664 if ( iter == 0 )
665 {
666 for ( int i = 0; i < len; i++ )
667 {
668 const double x = c[i];
669 const double v = cv[i];
670 // extrapolation using velocity in cv
671 c[i] = x + dt * v;
672 cv[i] = x;
673 }
674 tmap["gram"].start();
675 sd->gram();
676 tmap["gram"].stop();
677 }
678 else if ( iter == 1 )
679 {
680 sdv->align(*sd);
681 for ( int i = 0; i < len; i++ )
682 {
683 const double x = c[i];
684 const double xm = cv[i];
685 c[i] = 2.0 * x - xm;
686 cv[i] = x;
687 cmm[i] = xm;
688 }
689 tmap["gram"].start();
690 sd->gram();
691 tmap["gram"].stop();
692 }
693 else
694 {
695 // align wf with wfmm before extrapolation
696 sd->align(*sdmm);
697
698 // extrapolate
699 for ( int i = 0; i < len; i++ )
700 {
701 const double x = c[i]; // current wf (scf converged) at t
702 const double xm = cv[i]; // extrapolated wf at t
703 const double xmm = cmm[i]; // extrapolated wf at t-dt
704 const double asp_a1 = 0.5;
705 c[i] = 2.0 * x - xm +
706 asp_a1 * ( x - 2.0 * xm + xmm );
707 //c[i] = 2.5 * x - 2.0 * xm + 0.5 * xmm;
708 cmm[i] = xm;
709 cv[i] = x;
710 }
711 // orthogonalize the extrapolated value
712 tmap["gram"].start();
713 sd->gram();
714 tmap["gram"].stop();
715
716 // c[i] now contains the extrapolated value
717 }
718 // c[i] is now ready for electronic iterations
719 }
720 else // normal extrapolation
721 {
722 SlaterDet* sd = wf.sd(isp_loc,ikp_loc);
723 SlaterDet* sdv = wfv->sd(isp_loc,ikp_loc);
724 double* c = (double*) sd->c().cvalptr();
725 double* cv = (double*) sdv->c().cvalptr();
726 const int mloc = s_.wf.sd(isp_loc,ikp_loc)->c().mloc();
727 const int nloc = s_.wf.sd(isp_loc,ikp_loc)->c().nloc();
728 const int len = 2*mloc*nloc;
729 if ( iter == 0 )
730 {
731 // copy c to cv
732 for ( int i = 0; i < len; i++ )
733 {
734 const double x = c[i];
735 const double v = cv[i];
736 c[i] = x + dt * v;
737 cv[i] = x;
738 }
739 tmap["gram"].start();
740 sd->gram();
741 tmap["gram"].stop();
742 }
743 else
744 {
745 tmap["align"].start();
746 sdv->align(*sd);
747 tmap["align"].stop();
748
749 // linear extrapolation
750 for ( int i = 0; i < len; i++ )
751 {
752 const double x = c[i];
753 const double xm = cv[i];
754 c[i] = 2.0 * x - xm;
755 cv[i] = x;
756 }
757 tmap["lowdin"].start();
758 sd->lowdin();
759 tmap["lowdin"].stop();
760 }
761 }
762 }
763 }
764 } // atoms_move && extrapolate_wf
765
766 // do nitscf self-consistent iterations, each with nite electronic steps
767 if ( wf_stepper != 0 )
768 {
769 wf_stepper->preprocess();
770 if ( anderson_charge_mixing )
771 mixer.restart();
772
773 double ehart, ehart_m;
774 double delta_ehart;
775 bool scf_converged = false;
776 int itscf = 0;
777 double etotal = 0.0, etotal_m = 0.0, etotal_mm = 0.0;
778
779 while ( !scf_converged && itscf < nitscf_ )
780 {
781 if ( nite_ > 0 && onpe0 )
782 cout << " BOSampleStepper: start scf iteration" << endl;
783
784 // update charge density
785 tmap["charge"].start();
786 // The density is updated at the first step if update_density_first_
787 // is true.
788 // It is always updated after the first step
789 if ( ( update_density_first_ || itscf>0 ) )
790 cd_.update_density();
791 tmap["charge"].stop();
792
793 if ( onpe0 )
794 cout << cd_;
795
796 // charge mixing
797 if ( nite_ > 0 )
798 {
799 if ( itscf == 0 )
800 {
801 // at first scf iteration, nothing to mix
802 // memorize rhog in rhog_in for next iteration
803
804 for ( int ispin = 0; ispin < nspin; ispin++ )
805 for ( int i = 0; i < ng; i++ )
806 rhog_in[i+ng*ispin] = cd_.rhog[ispin][i];
807 }
808 else
809 {
810 // itscf > 0
811 // compute unscreened correction drhog
812 for ( int ispin = 0; ispin < nspin; ispin++ )
813 {
814 for ( int i = 0; i < ng; i++ )
815 {
816 drhog[i+ng*ispin] = (cd_.rhog[ispin][i]-rhog_in[i+ng*ispin]);
817 }
818 }
819
820 const double alpha = s_.ctrl.charge_mix_coeff;
821 // Anderson acceleration
822 if ( anderson_charge_mixing )
823 {
824 // row weighting of LS calculation
825 for ( int ispin = 0; ispin < nspin; ispin++ )
826 {
827 for ( int i = 0; i < ng; i++ )
828 drhog[i+ng*ispin] /= wls[i];
829 }
830
831 if ( sd_ctxt.mycol() == 0 )
832 {
833 // use AndersonMixer on first column only and bcast results
834 mixer.update((double*)&rhog_in[0], (double*)&drhog[0],
835 (double*)&rhobar[0], (double*)&drhobar[0]);
836 const int n = 2*nspin*ng;
837 sd_ctxt.dbcast_send('r',n,1,(double*)&rhobar[0],n);
838 sd_ctxt.dbcast_send('r',n,1,(double*)&drhobar[0],n);
839 }
840 else
841 {
842 const int n = 2*nspin*ng;
843 sd_ctxt.dbcast_recv('r',n,1,(double*)&rhobar[0],n,-1,0);
844 sd_ctxt.dbcast_recv('r',n,1,(double*)&drhobar[0],n,-1,0);
845 }
846
847 for ( int ispin = 0; ispin < nspin; ispin++ )
848 {
849 for ( int i = 0; i < ng; i++ )
850 drhobar[i+ng*ispin] *= wls[i];
851 for ( int i = 0; i < ng; i++ )
852 rhog_in[i+ng*ispin] = rhobar[i+ng*ispin] +
853 alpha * drhobar[i+ng*ispin] * wkerker[i];
854 }
855 }
856 else
857 {
858 for ( int ispin = 0; ispin < nspin; ispin++ )
859 {
860 for ( int i = 0; i < ng; i++ )
861 rhog_in[i+ng*ispin] += alpha * drhog[i+ng*ispin] * wkerker[i];
862 }
863 }
864
865 for ( int ispin = 0; ispin < nspin; ispin++ )
866 {
867 for ( int i = 0; i < ng; i++ )
868 cd_.rhog[ispin][i] = rhog_in[i+ng*ispin];
869 }
870 cd_.update_rhor();
871 }
872 } // if nite_ > 0
873
874 // update vhxc:
875 // at first scf step:
876 // - update both vh and vxc
877 // at later steps:
878 // - update depending of values of update_vh_ and update_vxc_
879 tmap["update_vhxc"].start();
880 if ( itscf == 0 )
881 ef_.update_vhxc(compute_stress);
882 else
883 ef_.update_vhxc(compute_stress, update_vh_, update_vxc_);
884 tmap["update_vhxc"].stop();
885
886 // reset stepper only if multiple non-selfconsistent steps
887 if ( nite_ > 0 ) wf_stepper->preprocess();
888
889 // non-self-consistent loop
890 // repeat until the change in eigenvalue_sum is smaller than a
891 // fraction delta_ratio of the change in Hartree energy delta_ehart
892 // in the last scf iteration
893 bool nonscf_converged = false;
894 if ( itscf == 0 )
895 {
896 ehart = ef_.ehart();
897 delta_ehart = 0.0;
898 }
899 else if ( itscf == 1 )
900 {
901 ehart_m = ehart;
902 ehart = ef_.ehart();
903 delta_ehart = fabs(ehart - ehart_m);
904 }
905 else
906 {
907 // itscf > 1
908 // only allow decrease in delta_ehart
909 ehart_m = ehart;
910 ehart = ef_.ehart();
911 delta_ehart = min(delta_ehart,fabs(ehart - ehart_m));
912 }
913 #if DEBUG
914 if ( onpe0 )
915 {
916 cout << " BOSampleStepper::step: delta_ehart: "
917 << delta_ehart << endl;
918 }
919 #endif
920
921 int ite = 0;
922 double etotal_int;
923
924 double eigenvalue_sum, eigenvalue_sum_m = 0.0;
925 // if nite == 0: do 1 iteration, no screening in charge mixing
926 // if nite > 0: do nite iterations, use screening in charge mixing
927 //
928 while ( !nonscf_converged && ite < max(nite_,1) )
929 {
930 tmap["energy"].start();
931 ef_.energy(true,dwf,false,fion,false,sigma_eks);
932 tmap["energy"].stop();
933
934 // compute the sum of eigenvalues (with fixed weight)
935 // to measure convergence of the subspace update
936 // compute trace of the Hamiltonian matrix Y^T H Y
937 // scalar product of Y and (HY): tr Y^T (HY) = sum_ij Y_ij (HY)_ij
938 // Note: since the hamiltonian is hermitian and dwf=H*wf
939 // the dot product in the following line is real
940
941 if ( ite > 0 )
942 eigenvalue_sum_m = eigenvalue_sum;
943
944 eigenvalue_sum = real(s_.wf.dot(dwf));
945 if ( onpe0 )
946 cout << " <eigenvalue_sum> " << setprecision(8)
947 << eigenvalue_sum << " </eigenvalue_sum>" << endl;
948
949 tmap["wf_update"].start();
950 wf_stepper->update(dwf);
951 tmap["wf_update"].stop();
952
953 // compare delta_eig_sum only after first iteration
954 if ( ite > 0 )
955 {
956 double delta_eig_sum = fabs(eigenvalue_sum - eigenvalue_sum_m);
957 nonscf_converged |= (delta_eig_sum < delta_ratio * delta_ehart);
958 #if DEBUG
959 if ( onpe0 )
960 {
961 cout << " BOSampleStepper::step delta_eig_sum: "
962 << delta_eig_sum << endl;
963 }
964 #endif
965 }
966 ite++;
967 }
968
969 // subspace diagonalization
970 if ( compute_eigvec || s_.ctrl.wf_diag == "EIGVAL" )
971 {
972 tmap["energy"].start();
973 ef_.energy(true,dwf,false,fion,false,sigma_eks);
974 tmap["energy"].stop();
975 tmap["wfdiag"].start();
976 s_.wf.diag(dwf,compute_eigvec);
977 tmap["wfdiag"].stop();
978 // print eigenvalues
979 if ( MPIdata::onpe0() )
980 cout << "<eigenset>" << endl;
981 for ( int ispin = 0; ispin < wf.nspin(); ++ispin )
982 {
983 const int isp_loc = wf.isp_local(ispin);
984 for ( int ikp = 0; ikp < wf.nkp(); ++ikp )
985 {
986 const int ikp_loc = wf.ikp_local(ikp);
987 ostringstream ostr;
988 ostr.setf(ios::fixed,ios::floatfield);
989 ostr.setf(ios::right,ios::adjustfield);
990 int isrc = -1;
991 if ( ( isp_loc >= 0 ) && ( ikp_loc >= 0 ) )
992 {
993 if ( MPIdata::sd_rank() == 0 )
994 {
995 ostr.str("");
996 isrc = MPIdata::rank();
997 const int nst = wf.sd(isp_loc,ikp_loc)->nst();
998 const double eVolt = 2.0 * 13.6058;
999 ostr << " <eigenvalues spin=\"" << ispin
1000 << "\" kpoint=\""
1001 << setprecision(8)
1002 << wf.sd(isp_loc,ikp_loc)->kpoint()
1003 << "\" weight=\""
1004 << setprecision(8)
1005 << wf.weight(ikp)
1006 << "\" n=\"" << nst << "\">" << endl;
1007 for ( int i = 0; i < nst; i++ )
1008 {
1009 ostr << setw(12) << setprecision(5)
1010 << wf.sd(isp_loc,ikp_loc)->eig(i)*eVolt;
1011 if ( i%5 == 4 ) ostr << endl;
1012 }
1013 if ( nst%5 != 0 ) ostr << endl;
1014 ostr << " </eigenvalues>" << endl;
1015 }
1016 }
1017 cout0(ostr.str(),isrc);
1018 MPI_Barrier(MPIdata::comm());
1019 }
1020 }
1021 if ( MPIdata::onpe0() )
1022 cout << "</eigenset>" << endl;
1023 MPI_Barrier(MPIdata::comm());
1024 }
1025
1026 // update occupation numbers if fractionally occupied states
1027 // compute weighted sum of eigenvalues
1028 // default value if no fractional occupation
1029 double fac = ( wf.nspin() == 1 ) ? 2.0 : 1.0 ;
1030 double w_eigenvalue_sum = fac * eigenvalue_sum;
1031
1032 if ( fractional_occ )
1033 {
1034 if ( s_.ctrl.fermi_temp > 0 )
1035 wf.update_occ(s_.ctrl.fermi_temp);
1036 #if 0
1037 if ( onpe0 )
1038 {
1039 cout << " Wavefunction entropy: " << wf_entropy << endl;
1040 cout << " Entropy contribution to free energy: "
1041 << - wf_entropy * s_.ctrl.fermi_temp * boltz << endl;
1042 }
1043 #endif
1044 w_eigenvalue_sum = 0.0;
1045 for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
1046 {
1047 for ( int ikp_loc = 0; ikp_loc < wf.nkp_loc(); ++ikp_loc )
1048 {
1049 const int nst = wf.sd(isp_loc,ikp_loc)->nst();
1050 const int ikpg = wf.ikp_global(ikp_loc);
1051 const double wkp = wf.weight(ikpg);
1052 for ( int n = 0; n < nst; n++ )
1053 {
1054 const double occ = wf.sd(isp_loc,ikp_loc)->occ(n);
1055 w_eigenvalue_sum += wkp * occ * wf.sd(isp_loc,ikp_loc)->eig(n);
1056 }
1057 }
1058 }
1059 double tsum;
1060 MPI_Allreduce(&w_eigenvalue_sum,&tsum,1,
1061 MPI_DOUBLE,MPI_SUM,MPIdata::kp_sp_comm());
1062 w_eigenvalue_sum = tsum;
1063 }
1064
1065 // Harris-Foulkes estimate of the total energy
1066 etotal_int = w_eigenvalue_sum - ef_.ehart_e() + ef_.ehart_p() +
1067 ef_.esr() - ef_.eself() + ef_.dxc() + ef_.ets();
1068 #ifdef DEBUG
1069 if ( onpe0 )
1070 {
1071 cout << setprecision(8);
1072 cout << "w_eigenvalue_sum = " << setw(15) << w_eigenvalue_sum << endl;
1073 cout << "ef.dxc() = " << setw(15) << ef_.dxc() << endl;
1074 cout << "ef.ehart() = " << setw(15) << ef_.ehart() << endl;
1075 cout << "ef.ehart_e() = " << setw(15) << ef_.ehart_e() << endl;
1076 cout << "ef.ehart_ep() = " << setw(15) << ef_.ehart_ep() << endl;
1077 cout << "ef.esr() = " << setw(15) << ef_.esr() << endl;
1078 }
1079 #endif
1080
1081 if ( onpe0 )
1082 {
1083 cout.setf(ios::fixed,ios::floatfield);
1084 cout.setf(ios::right,ios::adjustfield);
1085 cout << " <etotal_int> " << setprecision(8) << setw(15)
1086 << etotal_int << " </etotal_int>\n";
1087 }
1088
1089 etotal_mm = etotal_m;
1090 etotal_m = etotal;
1091 etotal = etotal_int;
1092
1093 if ( nite_ > 0 && onpe0 )
1094 cout << " BOSampleStepper: end scf iteration" << endl;
1095
1096 // delta_etotal = interval containing etotal, etotal_m and etotal_mm
1097 double delta_etotal = fabs(etotal - etotal_m);
1098 delta_etotal = max(delta_etotal,fabs(etotal - etotal_mm));
1099 delta_etotal = max(delta_etotal,fabs(etotal_m - etotal_mm));
1100 // bcast the value of delta_etotal to ensure coherence
1101 // of scf_converged
1102 MPI_Bcast(&delta_etotal,1,MPI_DOUBLE,0,MPIdata::comm());
1103 scf_converged |= (delta_etotal < s_.ctrl.scf_tol);
1104 itscf++;
1105 } // while scf
1106
1107 if ( compute_mlwf || compute_mlwfc )
1108 {
1109 tmap["mlwf"].start();
1110 for ( int ispin = 0; ispin < wf.nspin(); ++ispin )
1111 {
1112 const int isp_loc = wf.isp_local(ispin);
1113 const int ikp = 0;
1114 const int ikp_loc = wf.ikp_local(ikp);
1115 if ( ( isp_loc >= 0 ) && ( ikp_loc >= 0 ) )
1116 {
1117 mlwft[isp_loc]->update();
1118 mlwft[isp_loc]->compute_transform();
1119
1120 if ( compute_mlwf )
1121 {
1122 SlaterDet& sd = *(wf.sd(isp_loc,ikp_loc));
1123 mlwft[isp_loc]->apply_transform(sd);
1124 }
1125 }
1126 }
1127
1128 // print mlwf centers
1129 D3vector edipole_sum, d3tsum;
1130 if ( onpe0 )
1131 cout << "<mlwfs>" << endl;
1132 for ( int ispin = 0; ispin < wf.nspin(); ++ispin )
1133 {
1134 const int isp_loc = wf.isp_local(ispin);
1135 const int ikp = 0;
1136 const int ikp_loc = wf.ikp_local(ikp);
1137 ostringstream ostr;
1138 int isrc = -1;
1139 if ( ( isp_loc >= 0 ) && ( ikp_loc >= 0 ) )
1140 {
1141 SlaterDet& sd = *(wf.sd(isp_loc,ikp_loc));
1142 if ( MPIdata::sd_rank() == 0 )
1143 {
1144 ostr.str("");
1145 ostr.setf(ios::fixed,ios::floatfield);
1146 ostr.setf(ios::right,ios::adjustfield);
1147 isrc = MPIdata::rank();
1148 ostr << " <mlwfset spin=\"" << ispin
1149 << "\" size=\"" << sd.nst() << "\">" << endl;
1150 double total_spread[6];
1151 for ( int j = 0; j < 6; j++ )
1152 total_spread[j] = 0.0;
1153 for ( int i = 0; i < sd.nst(); i++ )
1154 {
1155 D3vector ctr = mlwft[isp_loc]->center(i);
1156 double spi[6];
1157 for (int j=0; j<3; j++)
1158 {
1159 spi[j] = mlwft[isp_loc]->spread2(i,j);
1160 total_spread[j] += spi[j];
1161 }
1162
1163 ostr << " <mlwf center=\"" << setprecision(6)
1164 << setw(12) << ctr.x
1165 << setw(12) << ctr.y
1166 << setw(12) << ctr.z
1167 << " \" spread=\" "
1168 << setw(12) << spi[0]
1169 << setw(12) << spi[1]
1170 << setw(12) << spi[2] << " \"/>"
1171 << endl;
1172 }
1173
1174 ostr << " <total_spread> ";
1175 for ( int j = 0; j < 3; j++ )
1176 ostr << setprecision(6) << setw(15) << total_spread[j];
1177 ostr << " </total_spread>" << endl;
1178 D3vector edipole = mlwft[isp_loc]->dipole();
1179 ostr << " <electronic_dipole spin=\"" << ispin
1180 << "\"> " << edipole << " </electronic_dipole>" << endl;
1181 edipole_sum += edipole;
1182 ostr << " </mlwfset>" << endl;
1183 } // sd_rank() == 0
1184 }
1185 cout0(ostr.str(),isrc);
1186 MPI_Barrier(MPIdata::comm());
1187 } // ispin
1188 if ( onpe0 )
1189 cout << "</mlwfs>" << endl;
1190
1191 if ( MPIdata::sd_rank() == 0 )
1192 {
1193 MPI_Reduce(&edipole_sum[0],&d3tsum[0],3,
1194 MPI_DOUBLE,MPI_SUM,0,MPIdata::sp_comm());
1195 edipole_sum = d3tsum;
1196 }
1197
1198 if ( onpe0 )
1199 {
1200 D3vector idipole = atoms.dipole();
1201 cout << setprecision(6);
1202 cout << " <ionic_dipole> " << idipole
1203 << " </ionic_dipole>" << endl;
1204 cout << " <total_dipole> " << idipole + edipole_sum
1205 << " </total_dipole>" << endl;
1206 cout << " <total_dipole_length> " << length(idipole + edipole_sum)
1207 << " </total_dipole_length>" << endl;
1208 }
1209 tmap["mlwf"].stop();
1210 }
1211
1212 // If GS calculation only, print energy and atomset at end of iterations
1213 if ( gs_only )
1214 {
1215 tmap["charge"].start();
1216 cd_.update_density();
1217 tmap["charge"].stop();
1218
1219 tmap["update_vhxc"].start();
1220 ef_.update_vhxc(compute_stress);
1221 tmap["update_vhxc"].stop();
1222 const bool compute_forces = true;
1223 tmap["energy"].start();
1224 ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks);
1225 tmap["energy"].stop();
1226
1227 if ( onpe0 )
1228 {
1229 cout << ef_;
1230 if ( ef_.el_enth() )
1231 cout << *ef_.el_enth();
1232 cout << "<atomset>" << endl;
1233 cout << atoms.cell();
1234 for ( int is = 0; is < atoms.atom_list.size(); is++ )
1235 {
1236 int i = 0;
1237 for ( int ia = 0; ia < atoms.atom_list[is].size(); ia++ )
1238 {
1239 Atom* pa = atoms.atom_list[is][ia];
1240 cout << " <atom name=\"" << pa->name() << "\""
1241 << " species=\"" << pa->species()
1242 << "\">\n"
1243 << " <position> " << pa->position() << " </position>\n"
1244 << " <velocity> " << pa->velocity() << " </velocity>\n"
1245 << " <force> "
1246 << fion[is][i] << " "
1247 << fion[is][i+1] << " "
1248 << fion[is][i+2]
1249 << " </force>\n";
1250 cout << " </atom>" << endl;
1251 i += 3;
1252 }
1253 }
1254 cout << "</atomset>" << endl;
1255 cout << setprecision(6);
1256 cout << "<unit_cell_a_norm> " << atoms.cell().a_norm(0)
1257 << " </unit_cell_a_norm>" << endl;
1258 cout << "<unit_cell_b_norm> " << atoms.cell().a_norm(1)
1259 << " </unit_cell_b_norm>" << endl;
1260 cout << "<unit_cell_c_norm> " << atoms.cell().a_norm(2)
1261 << " </unit_cell_c_norm>" << endl;
1262 cout << setprecision(3) << "<unit_cell_alpha> "
1263 << atoms.cell().alpha() << " </unit_cell_alpha>" << endl;
1264 cout << setprecision(3) << "<unit_cell_beta> "
1265 << atoms.cell().beta() << " </unit_cell_beta>" << endl;
1266 cout << setprecision(3) << "<unit_cell_gamma> "
1267 << atoms.cell().gamma() << " </unit_cell_gamma>" << endl;
1268 cout << setprecision(3) << "<unit_cell_volume> "
1269 << atoms.cell().volume() << " </unit_cell_volume>" << endl;
1270 if ( compute_stress )
1271 {
1272 compute_sigma();
1273 print_stress();
1274 }
1275 }
1276 }
1277 wf_stepper->postprocess();
1278 }
1279 else
1280 {
1281 // wf_stepper == 0, wf_dyn == LOCKED
1282 // evaluate and print energy
1283 tmap["charge"].start();
1284 cd_.update_density();
1285 tmap["charge"].stop();
1286 tmap["update_vhxc"].start();
1287 ef_.update_vhxc(compute_stress);
1288 tmap["update_vhxc"].stop();
1289 tmap["energy"].start();
1290 ef_.energy(true,dwf,false,fion,false,sigma_eks);
1291 tmap["energy"].stop();
1292 if ( onpe0 )
1293 {
1294 cout << cd_;
1295 cout << ef_;
1296 if ( ef_.el_enth() )
1297 cout << *ef_.el_enth();
1298 }
1299 }
1300
1301 // if using force_tol or stress_tol, check if maxforce and maxstress
1302 // within tolerance
1303 if ( force_tol > 0.0 )
1304 {
1305 if ( onpe0 )
1306 cout << " maxforce: " << scientific
1307 << setprecision(4) << maxforce << fixed << endl;
1308 iter_done |= ( maxforce < force_tol );
1309 }
1310 if ( stress_tol > 0.0 )
1311 {
1312 if ( onpe0 )
1313 cout << " maxstress: " << scientific
1314 << setprecision(4) << maxstress << fixed << endl;
1315 iter_done |= ( maxstress < stress_tol );
1316 }
1317
1318 // print iteration time
1319
1320 double time = tm_iter.real();
1321 double tmin, tmax;
1322 MPI_Reduce(&time,&tmin,1,MPI_DOUBLE,MPI_MIN,0,MPIdata::comm());
1323 MPI_Reduce(&time,&tmax,1,MPI_DOUBLE,MPI_MAX,0,MPIdata::comm());
1324 if ( onpe0 )
1325 {
1326 string s = "name=\"iteration\"";
1327 cout << "<timing " << left << setw(22) << s
1328 << " min=\"" << setprecision(3) << tmin << "\""
1329 << " max=\"" << setprecision(3) << tmax << "\"/>"
1330 << endl;
1331 }
1332
1333 if ( onpe0 )
1334 cout << "</iteration>" << endl;
1335
1336 } // for iter
1337
1338 if ( atoms_move )
1339 {
1340 // compute ionic forces at last position to update velocities
1341 // consistently with last position
1342 tmap["charge"].start();
1343 cd_.update_density();
1344 tmap["charge"].stop();
1345
1346 tmap["update_vhxc"].start();
1347 ef_.update_vhxc(compute_stress);
1348 tmap["update_vhxc"].stop();
1349 const bool compute_forces = true;
1350 tmap["energy"].start();
1351 double energy =
1352 ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks);
1353 tmap["energy"].stop();
1354
1355 ionic_stepper->compute_v(energy,fion);
1356 // positions r0 and velocities v0 are consistent
1357 }
1358
1359 if ( atoms_move && extrapolate_wf )
1360 {
1361 // compute wavefunction velocity after last iteration
1362 // s_.wfv contains the previous wavefunction
1363
1364 tmap["align"].start();
1365 s_.wfv->align(s_.wf);
1366 tmap["align"].stop();
1367
1368 for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
1369 {
1370 for ( int ikp_loc = 0; ikp_loc < wf.nkp_loc(); ++ikp_loc )
1371 {
1372 double* c = (double*) s_.wf.sd(isp_loc,ikp_loc)->c().cvalptr();
1373 double* cm = (double*) s_.wfv->sd(isp_loc,ikp_loc)->c().cvalptr();
1374 const int mloc = s_.wf.sd(isp_loc,ikp_loc)->c().mloc();
1375 const int nloc = s_.wf.sd(isp_loc,ikp_loc)->c().nloc();
1376 const int len = 2*mloc*nloc;
1377 const double dt_inv = 1.0 / dt;
1378 if ( ntc_extrapolation )
1379 {
1380 double* cmm = (double*) wfmm->sd(isp_loc,ikp_loc)->c().cvalptr();
1381 for ( int i = 0; i < len; i++ )
1382 {
1383 const double x = c[i];
1384 const double xmm = cmm[i];
1385 cm[i] = dt_inv * ( x - xmm );
1386 }
1387 tmap["gram"].start();
1388 s_.wf.sd(isp_loc,ikp_loc)->gram();
1389 tmap["gram"].stop();
1390 }
1391 else // normal extrapolation or asp_extrapolation
1392 {
1393 for ( int i = 0; i < len; i++ )
1394 {
1395 const double x = c[i];
1396 const double xm = cm[i];
1397 cm[i] = dt_inv * ( x - xm );
1398 }
1399 tmap["gram"].start();
1400 s_.wf.sd(isp_loc,ikp_loc)->gram();
1401 tmap["gram"].stop();
1402 }
1403 }
1404 }
1405
1406 // compute ionic forces at last position to update velocities
1407 // consistently with last position
1408 tmap["charge"].start();
1409 cd_.update_density();
1410 tmap["charge"].stop();
1411
1412 tmap["update_vhxc"].start();
1413 ef_.update_vhxc(compute_stress);
1414 tmap["update_vhxc"].stop();
1415 const bool compute_forces = true;
1416 tmap["energy"].start();
1417 double energy =
1418 ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks);
1419 tmap["energy"].stop();
1420
1421 ionic_stepper->compute_v(energy,fion);
1422 // positions r0 and velocities v0 are consistent
1423 }
1424
1425 for ( int isp_loc = 0; isp_loc < wf.nsp_loc(); ++isp_loc )
1426 delete mlwft[isp_loc];
1427
1428 // delete steppers
1429 delete wf_stepper;
1430 delete ionic_stepper;
1431 delete cell_stepper;
1432
1433 if ( ntc_extrapolation || asp_extrapolation ) delete wfmm;
1434
1435 update_density_first_ = true;
1436 }
1437