1c
2c     $Id$
3c
4
5*  ************************************************************
6*  *                                                          *
7*  *             Band by Band Kohn-Sham Minimizer             *
8*  *                                                          *
9*  *                                                          *
10*  *                                                          *
11*  ************************************************************
12
13      subroutine bybminimize(E,deltae,deltac,current_iteration,
14     >                       set_iterations,iterations,failed)
15      implicit none
16      real*8     E(*)
17      real*8     deltae,deltac
18      integer    current_iteration
19      logical    set_iterations
20      integer    iterations
21      logical    failed
22
23
24#include "stdio.fh"
25#include "bafdecls.fh"
26#include "util.fh"
27
28*     **** local variables ****
29      integer MAX_SD_COUNT
30      parameter (MAX_SD_COUNT = 3)
31      integer MASTER,taskid
32      parameter (MASTER=0)
33
34      real*8  deltat_min
35      parameter (deltat_min=1.0d-3)
36
37      integer vall_in(2),vall_out(2),vall_junk(2),rho_in(2)
38      real*8  E0,dE0,deltae_old,Ein,deltae_history(10)
39      real*8  ks_deltae,deltav,dV,deltav_old,diis_error,e00
40      integer nx,ny,nz,stalled_count,sd_count
41
42
43      real*8     tole,tolc
44      real*8     ehartree,eorbit,exc,pxc,eion
45      real*8     Enew,Eold,alpha
46      !common / cgsd_block / Enew,Eold,alpha
47
48      integer it,it_in,i,ispin,bfgscount,icount,sd_it,cg_it
49      integer maxit_orbs
50
51      logical value,precondition,done,stalled,deltav_bad(4),oprint
52      logical ks_block
53      integer n2ft3d,n2ft3d_map
54      !real*8  e_ionmm,e_qmmm,e_mmmm,e_pol,e_vib,e_cav
55      !real*8  e_qmmm_e,e_qmmm_q,e_qmmm_lj,e_mmmm_q,e_mmmm_lj
56      real*8 e_lj,e_q,e_spring
57      real*8 ehfx,phfx
58
59      logical cosmo_on,cosmo1_on,V_APC_on,field_exist
60      real*8  eapc,papc
61
62*     **** external functions ****
63      logical control_print
64      integer  control_ispin,control_scf_algorithm,control_ks_algorithm
65      integer  control_it_in,control_it_out,psi_ne,control_version
66      real*8   control_tole,control_tolc,control_ks_alpha
67      real*8   rho_error,psi_1energy,psi_error
68      real*8   dng_1ehartree,lattice_omega
69      real*8   psi_1ke
70      real*8   psi_1vl,psi_1v_field,dng_1vl_mm
71      real*8   psi_1vnl
72      real*8   rho_1exc
73      real*8   rho_1pxc
74      real*8   ewald_e,ion_ion_e
75      real*8   psi_1eorbit
76
77      external control_print
78      external control_ispin,control_scf_algorithm,control_ks_algorithm
79      external control_it_in,control_it_out,psi_ne,control_version
80      external control_tole,control_tolc,control_ks_alpha
81      external rho_error,psi_1energy,psi_error
82      external dng_1ehartree,lattice_omega
83      external psi_1ke
84      external psi_1vl,psi_1v_field,dng_1vl_mm
85      external psi_1vnl
86      external rho_1exc
87      external rho_1pxc
88      external ewald_e,ion_ion_e
89      external psi_1eorbit
90
91*     ***** QM/MM external functions ****
92      logical  pspw_qmmm_found
93      real*8   pspw_qmmm_LJ_E,pspw_qmmm_Q_E,pspw_qmmm_spring_E
94      real*8   pspw_qmmm_LJ_Emix,pspw_qmmm_Q_Emix
95      external pspw_qmmm_found
96      external pspw_qmmm_LJ_E,pspw_qmmm_Q_E,pspw_qmmm_spring_E
97      external pspw_qmmm_LJ_Emix,pspw_qmmm_Q_Emix
98
99*     ***** pspw_charge external functions ****
100      logical  pspw_charge_found,control_precondition,pspw_HFX
101      real*8   pspw_charge_Energy_ion,pspw_charge_Energy_charge
102      external pspw_charge_found,control_precondition,pspw_HFX
103      external pspw_charge_Energy_ion,pspw_charge_Energy_charge
104      logical  pspw_Efield_found
105      external pspw_Efield_found
106      real*8   pspw_Efield_Energy_ion
107      external pspw_Efield_Energy_ion
108
109      real*8   psi_1_noupdate_energy,psi_eigenvalue
110      external psi_1_noupdate_energy,psi_eigenvalue
111      logical  psp_U_psputerm,meta_found
112      external psp_U_psputerm,meta_found
113      logical  nwpw_meta_gga_on,ion_disp_on
114      external nwpw_meta_gga_on,ion_disp_on
115      real*8   psi_1meta_gga_pxc,ion_disp_energy
116      external psi_1meta_gga_pxc,ion_disp_energy
117
118      logical  nwpw_cosmo_on,nwpw_cosmo1_on,pspw_V_APC_on
119      external nwpw_cosmo_on,nwpw_cosmo1_on,pspw_V_APC_on
120      real*8   psi_1vl_cosmo,nwpw_cosmo_EQionq,nwpw_cosmo_Eqq
121      external psi_1vl_cosmo,nwpw_cosmo_EQionq,nwpw_cosmo_Eqq
122
123      integer  control_ks_maxit_orb,control_ks_maxit_orbs
124      external control_ks_maxit_orb,control_ks_maxit_orbs
125      integer  control_diis_histories
126      external control_diis_histories
127
128
129      Ein = E(1)
130      call Parallel_taskid(taskid)
131      oprint = (taskid.eq.MASTER).and.control_print(print_medium)
132      cosmo_on  = nwpw_cosmo_on()
133      cosmo1_on = nwpw_cosmo1_on()
134      V_APC_on  = pspw_V_APC_on()
135      field_exist = pspw_charge_found().or.pspw_Efield_found()
136
137      call D3dB_nx(1,nx)
138      call D3dB_ny(1,ny)
139      call D3dB_nz(1,nz)
140      dV = lattice_omega()/dble(nx*ny*nz)
141      if (set_iterations) then
142        it_in = iterations
143        sd_it = 2
144        cg_it = 1
145      else
146        it_in = control_it_in()*control_it_out()
147        sd_it = 10
148        cg_it = 10
149      end if
150      maxit_orbs = control_ks_maxit_orbs()
151      tole  = control_tole()
152      tolc  = control_tolc()
153      precondition = control_precondition()
154      ispin = control_ispin()
155      deltav_old = 10.0d0
156      deltav     = 0.0d0
157
158      stalled       = .false.
159      deltae_history(1) = 0.0d0
160      deltae_history(2) = 0.0d0
161      deltae_history(3) = 0.0d0
162      deltae_history(4) = 0.0d0
163      stalled_count     = 0
164      sd_count          = 0
165
166      call D3dB_n2ft3d(1,n2ft3d)
167      call D3dB_n2ft3d_map(1,n2ft3d_map)
168
169*     **** allocate rho_in and rho_out ****
170      value = BA_push_get(mt_dbl,2*n2ft3d,
171     >                     'vall_in',vall_in(2),vall_in(1))
172      value = value.and.
173     >        BA_push_get(mt_dbl,2*n2ft3d,
174     >                     'vall_out',vall_out(2),vall_out(1))
175      value = value.and.
176     >        BA_push_get(mt_dbl,2*n2ft3d,
177     >                   'vall_junk',vall_junk(2),vall_junk(1))
178      value = value.and.
179     >        BA_push_get(mt_dbl,2*n2ft3d,
180     >                     'rho_in',rho_in(2),rho_in(1))
181      if (.not. value)
182     >   call errquit('bybminimize:out of stack memory',0,0)
183c      call dcopy(2*n2ft3d,0.0d0,0,dbl_mb(vall_in(1)),1)
184c      call dcopy(2*n2ft3d,0.0d0,0,dbl_mb(vall_out(1)),1)
185c      call dcopy(2*n2ft3d,0.0d0,0,dbl_mb(vall_junk(1)),1)
186c      call dcopy(2*n2ft3d,0.0d0,0,dbl_mb(rho_in(1)),1)
187      call Parallel_shared_vector_zero(.false.,2*n2ft3d,
188     >                                 dbl_mb(vall_in(1)))
189      call Parallel_shared_vector_zero(.false.,2*n2ft3d,
190     >                                 dbl_mb(vall_out(1)))
191      call Parallel_shared_vector_zero(.false.,2*n2ft3d,
192     >                                 dbl_mb(vall_junk(1)))
193      call Parallel_shared_vector_zero(.true.,2*n2ft3d,
194     >                                 dbl_mb(rho_in(1)))
195
196
197
198*     **** ion-ion energy ****
199      eion = 0.0d0
200      if (control_version().eq.3) eion = ewald_e()
201      if (control_version().eq.4) eion = ion_ion_e()
202
203
204*     **********************
205*     **** bybminimizer ****
206*     **********************
207
208
209*     **** set the initial density ****
210      if (current_iteration.eq.1) then
211         Enew   = psi_1energy()
212         alpha = control_ks_alpha()
213         deltae = -9232323299.0d0
214         ks_deltae = tole
215         call electron_gen_vall()
216         call electron_get_vall(dbl_mb(vall_out(1)))
217         call electron_get_vall(dbl_mb(vall_in(1)))
218
219         call psi_1gen_hml()
220         call psi_diagonalize_hml_assending()
221         call psi_1rotate2()
222         call psi_2to1()
223      else
224         call electron_get_vall(dbl_mb(vall_out(1)))
225         call electron_get_vall(dbl_mb(vall_in(1)))
226         !call psi_get_density(1,dbl_mb(rho_in(1)))
227      end if
228
229*     **** iniitialize SCF Mixing ****
230      call nwpw_scf_mixing_init(control_scf_algorithm(),alpha,
231     >                          control_diis_histories(),
232     >                          ispin,n2ft3d,dbl_mb(vall_out(1)))
233
234*     **** iniitialize RMM-DIIS ****
235      if (control_ks_algorithm().eq.1) call pspw_rmmdiis_init(5)
236
237*     **** iniitialize blocked cg ****
238      ks_block = .false.
239      if (control_ks_algorithm().eq.-1)  then
240         ks_block = .true.
241         call linesearch_maxiter_set(control_ks_maxit_orb())
242      end if
243
244
245
246*     ***** diis loop ****
247      it   = 0
248 2    it   = it + 1
249
250*     **** diaganolize KS matrix ****
251      if (ks_block) then
252         call psi_KS_block_update(e00,deltae,it,maxit_orbs,ks_deltae)
253      else
254         call psi_KS_update(1,
255     >                      control_ks_algorithm(),
256     >                      precondition,
257     >                      ks_deltae)
258      end if
259
260c      call psi_KS_update(1,
261c     >                   control_ks_algorithm(),
262c     >                   precondition,
263c     >                   ks_deltae)
264
265
266      call rho_1to2()
267      Eold = Enew
268      Enew = psi_1energy()
269
270      deltae = Enew-Eold
271
272      call electron_gen_vall()
273      call electron_get_vall(dbl_mb(vall_in(1)))
274
275*     **** compute deltaV ****
276c      call dcopy(ispin*n2ft3d_map,
277c     >           dbl_mb(vall_in(1)),1,
278c     >           dbl_mb(vall_junk(1)),1)
279      call Parallel_shared_vector_copy(.true.,ispin*n2ft3d,
280     >                                 dbl_mb(vall_in(1)),
281     >                                 dbl_mb(vall_junk(1)))
282      call DAXPY_OMP(ispin*n2ft3d_map,
283     >              (-1.0d0),
284     >              dbl_mb(vall_out(1)),1,
285     >              dbl_mb(vall_junk(1)),1)
286      call D3dB_rr_dot(1,dbl_mb(vall_junk(1)),
287     >                   dbl_mb(vall_junk(1)),deltav)
288      if (ispin.gt.1) then
289         call D3dB_rr_dot(1,dbl_mb(vall_junk(1)+n2ft3d),
290     >                      dbl_mb(vall_junk(1)+n2ft3d),e00)
291         deltav = deltav + e00
292      end if
293c      deltav = ddot(ispin*n2ft3d_map,
294c     >                 dbl_mb(vall_junk(1)),1,
295c     >                 dbl_mb(vall_junk(1)),1)
296c      call D3dB_SumAll(deltav)
297      deltav = deltav*dV
298
299
300
301*     **** update vall using density mixing ****
302c      if ((it.le.0) .or.
303c     >    ((dabs(deltae).lt.1.0d1) .and.
304c     >    (deltav       .lt.1.0d1) .and.
305c     >    (.not.stalled          ))) then
306      if ((it.le.0) .or.
307     >    ((deltae.lt.0.0d0) .and.
308     >    (.not.stalled          ))) then
309
310         call nwpw_scf_mixing(dbl_mb(vall_in(1)),dbl_mb(vall_out(1)),
311     >                        deltae,diis_error)
312
313*     **** bad convergence - try fixed step steepest descent ****
314      else
315
316  30     call sdminimize(sd_it)
317         sd_count = sd_count + 1
318         Eold   = Enew
319         Enew   = psi_1energy()
320
321c         if ((Enew.gt.Eold).or.(dabs(Enew-Eold).gt.1.0d-1)) go to 30
322         if ((Enew.gt.Eold).and.(sd_count.lt.MAX_SD_COUNT)) go to 30
323
324c         call dcopy(ispin*n2ft3d,
325c     >              dbl_mb(vall_out(1)),1,
326c     >              dbl_mb(vall_junk(1)),1)
327         call Parallel_shared_vector_copy(.true.,ispin*n2ft3d,
328     >                                    dbl_mb(vall_out(1)),
329     >                                    dbl_mb(vall_junk(1)))
330         call electron_gen_vall()
331         call electron_get_vall(dbl_mb(vall_out(1)))
332         call nwpw_scf_mixing_reset(dbl_mb(vall_out(1)))
333
334
335         call DAXPY_omp(ispin*n2ft3d,
336     >                  (-1.0d0),
337     >                  dbl_mb(vall_out(1)),1,
338     >                  dbl_mb(vall_junk(1)),1)
339         call D3dB_rr_dot(1,dbl_mb(vall_junk(1)),
340     >                   dbl_mb(vall_junk(1)),deltav)
341         if (ispin.gt.1) then
342            call D3dB_rr_dot(1,dbl_mb(vall_junk(1)+n2ft3d),
343     >                         dbl_mb(vall_junk(1)+n2ft3d),e00)
344            deltav = deltav + e00
345         end if
346c         deltav = ddot(ispin*n2ft3d,
347c     >                 dbl_mb(vall_junk(1)),1,
348c     >                 dbl_mb(vall_junk(1)),1)
349c         call D3dB_SumAll(deltav)
350         deltav = deltav*dV
351
352         call psi_1gen_hml()
353         call psi_diagonalize_hml_assending()
354         call psi_1rotate2()
355         call psi_2to1()
356
357         stalled       = .false.
358         deltae_history(1) = 0.0d0
359         deltae_history(2) = 0.0d0
360         deltae_history(3) = 0.0d0
361         deltae_history(4) = 0.0d0
362         stalled_count     = 0
363
364      end if
365      call electron_set_vall(dbl_mb(vall_out(1)))
366
367
368*     **** tolerance checks ****
369      deltae = Enew-Eold
370      deltac = rho_error()
371      E(1)   = Enew+eion
372
373      if ((oprint).and.(.not.set_iterations)) then
374        write(luout,1310) it,E(1),deltae,deltac,deltav
375        call util_flush(luout)
376      end if
377 1310 FORMAT(I8,E20.10,3E15.5)
378
379
380      !**** set ks_deltae ****
381      ks_deltae = 0.001d0*dabs(deltae)
382      if (ks_deltae.lt.(0.1d0*tole)) ks_deltae = 0.1d0*tole
383      if (ks_deltae.gt.1.0d-4) ks_deltae = 1.0d-4
384      !ks_deltae = 0.1d0*tole
385
386
387
388      deltav_old = deltav
389
390      deltae_history(1)    = deltae_history(2)
391      deltae_history(2)    = deltae_history(3)
392      deltae_history(3)    = deltae_history(4)
393      deltae_history(4)    = deltae
394
395      if (stalled_count .gt.4) then
396        stalled = (deltae_history(4)
397     >            +deltae_history(3)
398     >            +deltae_history(2)
399     >            +deltae_history(1)).gt.0.0d0
400      else
401         stalled = .false.
402      end if
403      stalled_count = stalled_count + 1
404c      stalled = .false.
405      if (deltae.gt.0.0d0) stalled = .true.
406
407      precondition = precondition.and.(dabs(deltae).gt.1*tole)
408
409      done = ( (    (dabs(deltae).lt.tole)
410     >         .and.(deltae.lt.0.0d0)
411     >         .and.(deltac      .lt.tolc))
412     >       .or. (it.ge.it_in)
413     >       .or. (sd_count.ge.MAX_SD_COUNT))
414
415      if (.not.done) go to 2
416
417
418
419*     **** free memory ****
420      call nwpw_scf_mixing_end()
421      if (control_ks_algorithm().eq.1) call pspw_rmmdiis_end()
422      value =           BA_pop_stack(rho_in(2))
423      value = value.and.BA_pop_stack(vall_junk(2))
424      value = value.and.BA_pop_stack(vall_out(2))
425      value = value.and.BA_pop_stack(vall_in(2))
426      if (.not. value)
427     >  call errquit('bybminimize: popping stack',1,0)
428
429c      call psi_check()
430
431
432*     **** set return energies **** - This is duplicate code
433      !Enew     = psi_1energy()
434      eorbit   = psi_1eorbit()
435      ehartree = dng_1ehartree()
436      exc      = rho_1exc()
437      pxc      = rho_1pxc()
438
439      E(1)  = Enew + eion
440      E(2)  = eorbit
441      E(3)  = ehartree
442      E(4)  = exc
443      E(5)  = eion
444      E(6)  = psi_1ke()
445      E(7)  = psi_1vl()
446      E(8)  = psi_1vnl()
447      E(9)  = 2.0d0*ehartree
448      E(10) = pxc
449      if (pspw_qmmm_found()) then
450         e_lj     = pspw_qmmm_LJ_E()
451         e_q      = pspw_qmmm_Q_E()
452         e_spring = pspw_qmmm_spring_E()
453         E(1)  = E(1) + e_lj + e_q + e_spring
454
455         E(11) = e_lj
456         E(12) = e_q
457         E(13) = e_spring
458
459*        **** Eqm-mm energy ***
460         E(14) = pspw_qmmm_LJ_Emix()
461         E(14) = E(14) + pspw_qmmm_Q_Emix()
462         E(14) = E(14) + dng_1vl_mm()
463
464      end if
465
466*     **** COSMO terms ****
467      if (cosmo_on) then
468
469         !*** cosmo1 ****
470         if (cosmo1_on) then
471            E(46) = psi_1vl_cosmo()
472            E(47) = nwpw_cosmo_EQionq()
473            E(48) = nwpw_cosmo_Eqq()
474
475         !*** cosmo2 ****
476         else
477            call electron_apc_energies(eapc,papc)
478            E(22) = eapc
479            E(23) = papc
480            E(46) = eapc
481            E(47) = nwpw_cosmo_EQionq() !** E(Qion|q)
482            E(48) = nwpw_cosmo_Eqq()    !** E(q|q)
483
484            !E(1)  = E(1) + E(22) - E(23) + E(47) + E(48)
485            E(1)  = E(1) + E(22) - E(23) !*** probably NOT CORRECT ***
486         end if
487
488      else if (V_APC_on) then
489         call electron_apc_energies(eapc,papc)
490         E(22) = eapc
491         E(23) = papc
492         E(1)  = E(1) + eapc - papc !*** probably NOT CORRECT ***
493      end if
494
495
496*     **** get pspw_charge pspw_Efield energies ****
497      if (field_exist) then
498         E(49)  = psi_1v_field()
499         E(50)  = pspw_charge_Energy_ion()
500     >          + pspw_Efield_Energy_ion()
501         E(51)  = pspw_charge_Energy_charge()
502         E(1)   = E(1) + E(50) + E(51)
503      end if
504
505*     **** HFX terms ****
506      if (pspw_HFX()) then
507         call electron_HFX_energies(ehfx,phfx)
508         E(26) = ehfx
509         E(27) = phfx
510      end if
511
512*     **** DFT+U terms ****
513      if (psp_U_psputerm()) then
514         call electron_U_energies(ehfx,phfx)
515         E(29) =  ehfx
516         E(30) =  phfx
517      end if
518
519*     **** Metadynamics potential terms ****
520      if (meta_found()) then
521         call electron_meta_energies(ehfx,phfx)
522         E(31) =  ehfx
523         E(32) =  phfx
524      end if
525
526*     **** Metadynamics GGA Tau potential term ****
527      if (nwpw_meta_gga_on()) then
528         E(10) = E(10) - psi_1meta_gga_pxc()
529      end if
530
531*     **** Dispersion energy ****
532      if (ion_disp_on()) then
533         E(33) = ion_disp_energy()
534         E(1)  = E(1) + E(33)
535      end if
536
537
538      failed = (sd_count.ge.MAX_SD_COUNT)
539
540      return
541      end
542
543
544