1*
2* $Id$
3*
4*  ************************************************************
5*  *                MPI cpmd routine                          *
6*  *                                                          *
7*  *  This is a developing cpmdv3 parallel code wrtten in     *
8*  *  Fortran and MPI.                                        *
9*  *                                                          *
10*  *    + mpl message passing library used                    *
11*  *                                                          *
12*  *    + ngp is used instead of nfft in this proceudure      *
13*  *                                                          *
14*  *    + error checking is based on aimd.h parameters        *
15*  *      then control file                                   *
16*  ************************************************************
17
18#ifdef USE_OPENMP
19#define PSI_OMP
20#endif
21
22      subroutine inner_loop_md(verlet,sa_alpha,
23     >                      ispin,ne,neq,
24     >                      npack1,nfft3d,nemaxq,
25     >                      psi0,psi1,psi2,dn,
26     >                      it_in,it_sum,E,
27     >                      hml,lmd,lmd1,
28     >                      Hpsi,psi_r,
29     >                      calc_pressure,pressure,p1,p2,
30     >                      fractional,occ0,occ1,occ2)
31#ifdef PSI_OMP
32      use omp_lib
33#endif
34      implicit none
35      logical    verlet
36      real*8     sa_alpha(2)
37      integer    ispin,ne(2),neq(2)
38      integer    npack1,nfft3d,nemaxq
39      complex*16 psi0(npack1,nemaxq)
40      complex*16 psi1(npack1,nemaxq)
41      complex*16 psi2(npack1,nemaxq)
42      real*8     dn(2*nfft3d,2)
43      integer    it_in,it_sum
44      real*8     E(*)
45      real*8     hml(*),lmd(*),lmd1(*)
46
47*     **** very big workspace variables ****
48      complex*16 Hpsi(npack1,nemaxq)
49      real*8     psi_r(2*nfft3d,nemaxq)
50
51      logical calc_pressure
52      real*8  pressure,p1,p2,stress(3,3)
53
54      logical fractional
55      real*8 occ0(*),occ1(*),occ2(*)
56
57#include "bafdecls.fh"
58#include "errquit.fh"
59ccccccc#include "frac_occ.fh"
60
61
62*     **** local variables ****
63      logical move,fei
64      integer n2ft3d,np,np_i,np_j,taskid
65      integer i,j,ii,jj,n,n1(2),n2(2),it,ms,nn,ierr,gga
66      integer nx,ny,nz
67      integer index,indext
68      real*8  sum,Eold,eorbit,eion,ehartr,eke,eki,sse,ssr,sa1,sa2
69      real*8  exc,exc2,pxc,pxc2,dte,dte0,scal1,scal2,dv,dt,fmass,h
70      real*8  ehsic,phsic,exsic,pxsic,ehfx,phfx,espring,enlocal,elocal
71      !real*8  e_ionmm,e_qmmm,e_mmmm,e_pol,e_vib,e_cav
72      real*8   e_lj,e_q,e_spring,Eapc,Papc
73      real*8   ehartree_atom,ecmp_cmp,ecmp_pw,exc_atom,pxc_atom
74      real*8  s,r
75      integer tid,nthreads
76
77
78*     **** MA local variables ****
79      logical value,nose,field_exist,sic,allow_translation
80      logical V_APC_on
81*     real*8     tmp_L(8*nemax*nemax)
82*     complex*16 tmp1(nfft3d)
83*     complex*16 tmp2(nfft3d)
84c     complex*16  vl(nfft3d)
85c     complex*16  vc(nfft3d)
86c     complex*16 dng(nfft3d)
87c     real*8     xcp(2*nfft3d,2)
88c     real*8     xce(2*nfft3d,2)
89c     real*8     fion(3,natmx)
90      integer tmp_L(2)
91      integer tmp1(2),tmp2(2)
92      integer vl(2),vc(2),dng(2)
93      integer rho(2),vlr_l(2),r_grid(2)
94      integer xcp(2),xce(2),dnall(2)
95      integer v_field(2)
96      integer natmx,fion(2),ftest(2)
97      integer npack0,nion1
98
99*     ***** external functions ****
100      integer  ion_nion,control_gga
101      real*8   ion_ke,ion_ion_e,E_vnonlocal
102      real*8   control_time_step,control_fake_mass,ion_dti
103      real*8   lattice_omega,coulomb_e,ewald_e
104      external ion_nion,control_gga
105      external ion_ke,ion_ion_e,E_vnonlocal
106      external control_time_step,control_fake_mass,ion_dti
107      external lattice_omega,coulomb_e,ewald_e
108      logical  psp_semicore
109      external psp_semicore
110      integer  control_version
111      external control_version
112      logical  control_Nose,control_Fei
113      external control_Nose,control_Fei
114      real*8   Nose_e_energy,Nose_r_energy,Nose_sse,Nose_ssr
115      real*8   Nose_dXe,Nose_dXr
116      external Nose_e_energy,Nose_r_energy,Nose_sse,Nose_ssr
117      external Nose_dXe,Nose_dXr
118
119*     ***** QM/MM external functions ****
120      logical  pspw_qmmm_found
121      real*8   pspw_qmmm_LJ_E,pspw_qmmm_Q_E,pspw_qmmm_spring_E
122      real*8   pspw_qmmm_LJ_Emix,pspw_qmmm_Q_Emix
123      external pspw_qmmm_found
124      external pspw_qmmm_LJ_E,pspw_qmmm_Q_E,pspw_qmmm_spring_E
125      external pspw_qmmm_LJ_Emix,pspw_qmmm_Q_Emix
126
127*     ***** pspw_charge external functions ****
128      logical  pspw_charge_found,pspw_bqext
129      external pspw_charge_found,pspw_bqext
130      integer  pspw_charge_nion
131      external pspw_charge_nion
132      real*8   pspw_charge_Energy_ion,pspw_charge_Energy_charge
133      external pspw_charge_Energy_ion,pspw_charge_Energy_charge
134      real*8   electron_psi_v_field_ave
135      external electron_psi_v_field_ave
136      logical  pspw_Efield_found
137      external pspw_Efield_found
138      real*8   pspw_Efield_Energy_ion
139      external pspw_Efield_Energy_ion
140
141
142      logical  dplot_iteration_check
143      external dplot_iteration_check
144
145      logical  pspw_SIC,pspw_SIC_relaxed,control_allow_translation
146      logical  pspw_HFX,pspw_HFX_relaxed
147      external pspw_SIC,pspw_SIC_relaxed,control_allow_translation
148      external pspw_HFX,pspw_HFX_relaxed
149
150      double precision Dneall_m_trace
151      external         Dneall_m_trace
152      logical  Dneall_m_push_get_block,Dneall_m_pop_stack
153      external Dneall_m_push_get_block,Dneall_m_pop_stack
154
155      logical  meta_found,tamd_found,psp_U_psputerm
156      external meta_found,tamd_found,psp_U_psputerm
157      logical  nwpw_meta_gga_on,ion_disp_on
158      external nwpw_meta_gga_on,ion_disp_on
159      real*8   nwpw_meta_gga_pxc,ion_disp_energy
160      external nwpw_meta_gga_pxc,ion_disp_energy
161
162*     ***** PAW external functions ****
163      logical  psp_pawexist
164      external psp_pawexist
165      real*8   psp_hartree_atom,psp_hartree_cmp_cmp,psp_hartree_cmp_pw
166      external psp_hartree_atom,psp_hartree_cmp_cmp,psp_hartree_cmp_pw
167      real*8   psp_kinetic_core,psp_ion_core
168      external psp_kinetic_core,psp_ion_core
169      integer  Parallel_threadid,Parallel_nthreads
170      external Parallel_threadid,Parallel_nthreads
171
172*     **** cosmo external functions ****
173      logical  pspw_V_APC_on
174      external pspw_V_APC_on
175
176
177#ifdef PSI_OMP
178      integer nida,nidb
179      integer  mthr
180
181      real*8  adiff
182      logical notgram
183
184      integer MASTER
185      parameter (MASTER=0)
186      integer  Parallel_maxthreads
187      external Parallel_maxthreads
188      integer thrlmbda(2)
189
190      INTEGER(kind=omp_nest_lock_kind) reduce_lock1
191      INTEGER(kind=omp_nest_lock_kind) reduce_lock2
192      INTEGER(kind=omp_nest_lock_kind) reduce_lock3
193      common / reduce_ffm / reduce_lock1,reduce_lock2,reduce_lock3
194
195*     **** matrix_blocking common block ****
196      integer mblock(2),nblock(2),algorithm(2)
197      common /matrix_blocking/ mblock,nblock,algorithm
198#endif
199
200
201
202      call Parallel_np(np)
203      call Parallel_taskid(taskid)
204      call Pack_npack(0,npack0)
205
206      n2ft3d = 2*nfft3d
207      field_exist       = pspw_charge_found().or.pspw_Efield_found()
208      sic               = pspw_SIC()
209      gga               = control_gga()
210      allow_translation = control_allow_translation()
211      fei = control_Fei()
212      V_APC_on  = pspw_V_APC_on()
213
214*     **** allocate MA local variables ****
215      call nwpw_timing_start(12)
216      value = Dneall_m_push_get_block(1,8,tmp_L)
217      value = value.and.
218     >        BA_push_get(mt_dcpl,(nfft3d),'tmp1',tmp1(2),tmp1(1))
219      value = value.and.
220     >        BA_push_get(mt_dcpl,(nfft3d),'tmp2',tmp2(2),tmp2(1))
221
222      if (control_version().eq.3) then
223      value = value.and.
224     >        BA_push_get(mt_dcpl,(npack0),'vc',  vc(2),  vc(1))
225      end if
226
227      if (control_version().eq.4) then
228       value = value.and.
229     >        BA_push_get(mt_dbl,(n2ft3d),'vc',vc(2),vc(1))
230
231       value = value.and.
232     >        BA_push_get(mt_dbl,(n2ft3d),'vlr_l',vlr_l(2),vlr_l(1))
233      end if
234
235      if ((control_version().eq.4).or.(field_exist)) then
236       value = value.and.
237     >    BA_push_get(mt_dbl,(3*n2ft3d),'r_grid',r_grid(2),r_grid(1))
238      end if
239
240      value = value.and.
241     >  BA_push_get(mt_dbl,(n2ft3d),'v_field',v_field(2),v_field(1))
242
243      value = value.and.
244     >         BA_push_get(mt_dcpl,(npack0),'vl',  vl(2),  vl(1))
245      value = value.and.
246     >        BA_push_get(mt_dbl,(n2ft3d),'rho',rho(2), rho(1))
247      value = value.and.
248     >        BA_push_get(mt_dcpl,(npack0),'dng',dng(2), dng(1))
249      value = value.and.
250     >        BA_push_get(mt_dbl,(4*nfft3d),'xcp',xcp(2), xcp(1))
251      value = value.and.
252     >        BA_push_get(mt_dbl,(4*nfft3d),'xce',xce(2), xce(1))
253      value = value.and.
254     >        BA_push_get(mt_dbl,(4*nfft3d),'dnall',dnall(2),dnall(1))
255      natmx = ion_nion()
256      if (pspw_charge_found().and.
257     >    (.not.pspw_bqext())) natmx = natmx + pspw_charge_nion()
258      value = value.and.
259     >        BA_push_get(mt_dbl,(3*natmx),'fion',fion(2),fion(1))
260      value = value.and.
261     >        BA_push_get(mt_dbl,(3*natmx),'ftest',ftest(2),ftest(1))
262      call Parallel_shared_vector_zero(.false.,3*natmx,dbl_mb(fion(1)))
263      call Parallel_shared_vector_zero(.true.,3*natmx,dbl_mb(ftest(1)))
264
265#ifdef PSI_OMP
266      mthr = Parallel_maxthreads()
267      mthr = max(mthr,4)
268
269      call Parallel_np(np)
270      call Parallel_taskid(taskid)
271      nida = 0
272      if (taskid.eq.MASTER) nida = 1
273      nidb = npack1-nida
274
275      !write(*,*) mthr,ne(1)
276
277c      value = value .and. MA_set_auto_verify(.true.)
278      value = value.and.
279     >        BA_push_get(mt_dbl,(mthr*ne(1)*ne(1)*8),'thrlmbda',
280     >                                     thrlmbda(2),thrlmbda(1))
281
282      call omp_init_nest_lock(reduce_lock1)
283      call omp_init_nest_lock(reduce_lock2)
284      call omp_init_nest_lock(reduce_lock3)
285*     **** define blocking for dgemm matrix multiply ****
286      algorithm(1) = -1
287      algorithm(2) = -1
288      do ms=1,ispin
289         if (ne(ms).gt.(mthr*np)) then
290            algorithm(ms) = 1
291            call Parallel_matrixblocking(mthr*np,ne(ms),ne(ms),
292     >                                mblock(ms),nblock(ms))
293         else if (ne(ms).gt.(mthr)) then
294            algorithm(ms) = 0
295            call Parallel_matrixblocking(mthr,ne(ms),ne(ms),
296     >                                mblock(ms),nblock(ms))
297         else
298            algorithm(ms) = -1
299         end if
300      end do
301
302#endif
303      if (.not.value)
304     > call errquit('inner_loop_md:pushing stack',0, MA_ERR)
305
306      call Parallel_shared_vector_zero(.false.,
307     >                           4*nfft3d,dbl_mb(dnall(1)))
308      call Parallel_shared_vector_zero(.false.,4*nfft3d,dbl_mb(xcp(1)))
309      call Parallel_shared_vector_zero(.true.,4*nfft3d,dbl_mb(xce(1)))
310
311      call nwpw_timing_end(12)
312
313      call D3dB_nx(1,nx)
314      call D3dB_ny(1,ny)
315      call D3dB_nz(1,nz)
316      move = .true.
317
318      nose = control_Nose()
319      sse = 1.0d0
320      ssr = 1.0d0
321
322      n1(1) = 1
323      n2(1) = neq(1)
324      n1(2) = neq(1) + 1
325      n2(2) = neq(1) + neq(2)
326
327      dt    = control_time_step()
328      fmass = control_fake_mass()
329      dte   = dt*dt/fmass
330      if (.not. verlet) dte=0.5d0*dte
331      h = 1.0d0/(2.0d0*dt)
332
333      sa1 = 1.0d0
334      sa2 = 1.0d0
335      if (.not.nose) then
336        sa1 =    1.0d0/(2.0d0-sa_alpha(1))
337        sa2 = sa_alpha(1)/(2.0d0-sa_alpha(1))
338      end if
339
340      scal1 = 1.0d0/dble(nx*ny*nz)
341      scal2 = 1.0d0/lattice_omega()
342      dv    = scal1*lattice_omega()
343
344      if ((control_version().eq.4).or.(field_exist))
345     >   call lattice_r_grid(dbl_mb(r_grid(1)))
346
347      espring = 0.0d0
348
349
350*     ******************************************
351*     ****                                  ****
352*     **** Start of molecular dynamics loop ****
353*     ****                                  ****
354*     ******************************************
355!$OMP PARALLEL private(n,i,ms,it,tid,nthreads,r,s,dte0)
356      tid      = Parallel_threadid()
357      nthreads = Parallel_nthreads()
358      do it=1,it_in
359        !call dcopy(2*npack1*nemaxq,psi1,1,psi0,1)
360        !call dcopy(2*npack1*nemaxq,psi2,1,psi1,1)
361        call Pack_c_nCopy(1,nemaxq,psi1,psi0)
362        call Pack_c_nCopy(1,nemaxq,psi2,psi1)
363*       *** skip ion_shift if newton step ***
364        if (verlet) call ion_shift()
365        if (nose.and.verlet) call Nose_shift()
366
367
368*       ********************************
369*       **** generate phaze factors ****
370*       ********************************
371        call phafac()
372        if (control_version().eq.3) call ewald_phafac()
373        if (psp_pawexist()) call nwpw_gintegrals_set(.true.)
374
375        call nwpw_timing_start(11)
376*       *******************
377*       **** get psi_r ****
378*       *******************
379c        do n=n1(1),n2(ispin)
380c           call Pack_c_Copy(1,psi1(1,n),psi_r(1,n))
381c           call Pack_c_unpack(1,psi_r(1,n))
382c           call D3dB_cr_fft3b(1,psi_r(1,n))
383c           call D3dB_r_Zero_Ends(1,psi_r(1,n))
384c        end do
385
386!$OMP DO private(n)
387        do n=n1(1),n2(ispin)
388           call Pack_c_Copy0(1,psi1(1,n),psi_r(1,n))
389        end do
390!$OMP END DO
391
392        call Grsm_gh_fftb(nfft3d,n2(ispin),psi_r)
393!$OMP BARRIER
394
395!$OMP DO private(n)
396        do n=n1(1),n2(ispin)
397           call D3dB_r_Zero_Ends0(1,psi_r(1,n))
398        end do
399!$OMP END DO
400
401        call pspw_hfx_localize2_n(3,psi_r,psi0,psi1,psi2)
402
403        call nwpw_meta_gga_gen_tau(ispin,neq,psi1)
404
405
406*       *********************
407*       **** generate dn ****
408*       *********************
409        !call dcopy(ispin*n2ft3d,0.0d0,0,dn,1)
410        call Parallel_shared_vector_zero(.true.,ispin*n2ft3d,dn)
411        if (fractional) then
412          do ms=1,ispin
413             do n=n1(ms),n2(ms)
414!$OMP DO private(i)
415                do i=1,n2ft3d
416                   dn(i,ms) = dn(i,ms)
417     >                      + scal2*(psi_r(i,n)**2)
418     >                       *occ1(n)
419                end do
420!$OMP END DO
421             end do
422             !call D3dB_r_Zero_Ends(1,dn(1,ms))
423             !call D1dB_Vector_SumAll(n2ft3d,dn(1,ms))
424          end do
425        else
426          do ms=1,ispin
427             do n=n1(ms),n2(ms)
428!$OMP DO private(i)
429                do i=1,n2ft3d
430                   dn(i,ms) = dn(i,ms) + scal2*(psi_r(i,n)**2)
431                end do
432!$OMP END DO
433             end do
434             !call D3dB_r_Zero_Ends(1,dn(1,ms))
435             !call D1dB_Vector_SumAll(n2ft3d,dn(1,ms))
436          end do
437        end if
438        call D1dB_Vector_SumAll(ispin*n2ft3d,dn)
439
440
441*       **********************
442*       **** generate dng ****
443*       **********************
444        call D3dB_rr_Sum(1,dn(1,1),dn(1,ispin),dbl_mb(rho(1)))
445        call D3dB_r_SMul(1,scal1,dbl_mb(rho(1)),dcpl_mb(tmp1(1)))
446        call D3dB_rc_fft3f(1,dcpl_mb(tmp1(1)))
447        call Pack_c_pack(0,dcpl_mb(tmp1(1)))
448        call Pack_c_Copy(0,dcpl_mb(tmp1(1)),dcpl_mb(dng(1)))
449
450
451*       ********************************************************
452*       **** generate dnall - used for semicore corrections ****
453*       ********************************************************
454        if (psp_semicore(0)) then
455           call semicore_density_update()
456           call semicore_density(dcpl_mb(tmp1(1)))
457c           call D3dB_r_SMul(1,0.5d0,dcpl_mb(tmp1(1)),dcpl_mb(tmp1(1)))
458           call D3dB_r_SMul1(1,0.5d0,dcpl_mb(tmp1(1)))
459        else
460           !call dcopy(n2ft3d,0.0d0,0,dcpl_mb(tmp1(1)),1)
461           call D3dB_r_Zero(1,dcpl_mb(tmp1(1)))
462        end if
463        do ms=1,ispin
464          call D3dB_rr_Sum(1,dn(1,ms),
465     >                     dcpl_mb(tmp1(1)),
466     >                     dbl_mb(dnall(1) +(ms-1)*n2ft3d))
467        end do
468        call nwpw_timing_end(11)
469
470
471
472
473
474*       *****************************************
475*       **** generate local pseudopotential  ****
476*       **** and also get force if move true ****
477*       *****************************************
478        call v_local(dcpl_mb(vl(1)),
479     >               move,
480     >               dcpl_mb(dng(1)),
481     >               dbl_mb(fion(1)))
482
483
484*       *** long-range psp for charge systems ***
485        if (control_version().eq.4) then
486          call v_lr_local(dbl_mb(r_grid(1)),
487     >                    dbl_mb(vlr_l(1)))
488          if (move) then
489             call grad_v_lr_local(dbl_mb(r_grid(1)),
490     >                            dbl_mb(rho(1)),
491     >                            dbl_mb(fion(1)))
492          end if
493        end if
494
495        if (V_APC_on) then
496           call pspw_V_APC(ispin,ne,
497     >                           dcpl_mb(dng(1)),
498     >                           dcpl_mb(vl(1)),
499     >                           Eapc,Papc,
500     >                           move,
501     >                           dbl_mb(fion(1)))
502        end if
503
504
505*       ************************************
506*       **** generate coulomb potential ****
507*       ************************************
508        if (control_version().eq.3)
509     >     call coulomb_v(dcpl_mb(dng(1)),dcpl_mb(vc(1)))
510
511        if (control_version().eq.4)
512     >     call coulomb2_v(dbl_mb(rho(1)),dbl_mb(vc(1)))
513
514
515*       *************************************************
516*       **** generate exchange-correlation potential ****
517*       *************************************************
518        call v_bwexc_all_tmp1(gga,n2ft3d,ispin,
519     >                        dbl_mb(dnall(1)),
520     >                        dbl_mb(xcp(1)),
521     >                        dbl_mb(xce(1)),
522     >                        dcpl_mb(tmp1(1)))
523
524
525*       **********************************************
526*       **** generate other real-space potentials ****
527*       **********************************************
528        if (field_exist) then
529
530           !call dcopy(n2ft3d,0.0d0,0,dbl_mb(v_field(1)),1)
531           call D3dB_r_Zero(1,dbl_mb(v_field(1)))
532
533*          **** generate charge potential ****
534           if (pspw_charge_found()) then
535            call pspw_charge_Generate_V(n2ft3d,
536     >                               dbl_mb(r_grid(1)),
537     >                               dbl_mb(v_field(1)))
538           end if
539
540*          **** generate Efield potential ****
541           if (pspw_Efield_found()) then
542            call pspw_Efield_Generate_V(n2ft3d,
543     >                               dbl_mb(r_grid(1)),
544     >                               dbl_mb(v_field(1)))
545           end if
546        end if
547
548
549
550*       ******************
551*       **** get Hpsi ****
552*       ******************
553        if (control_version().eq.3)
554     >  call psi_H(ispin,neq,psi1,psi_r,
555     >             dcpl_mb(vl(1)),
556     >             dbl_mb(v_field(1)),field_exist,
557     >             dcpl_mb(vc(1)),dbl_mb(xcp(1)),Hpsi,
558     >             move,dbl_mb(fion(1)),fractional,occ1)
559
560        if (control_version().eq.4)
561     >     call psi_Hv4(ispin,neq,psi1,psi_r,
562     >             dcpl_mb(vl(1)),dbl_mb(vlr_l(1)),
563     >             dbl_mb(v_field(1)),field_exist,
564     >             dbl_mb(vc(1)),dbl_mb(xcp(1)),Hpsi,
565     >             move,dbl_mb(fion(1)),fractional,occ1)
566
567
568
569*       **********************
570*       **** get ewald force *
571*       **********************
572*       **** get the ewald force ****
573        if (control_version().eq.3) call ewald_f(dbl_mb(fion(1)))
574
575*       **** get the free-space ion force ****
576        if (control_version().eq.4) call ion_ion_f(dbl_mb(fion(1)))
577
578
579*       ************************
580*       **** get semicoreforce *
581*       ************************
582        if (psp_semicore(0)) then
583           call semicore_xc_F(ispin,dbl_mb(xcp(1)),dbl_mb(fion(1)))
584        end if
585
586*       ***********************************************
587*       **** get the paw lagrange multiplier force ****
588*       ***********************************************
589        if (psp_pawexist()) then
590           if (.not.verlet) then
591              call Dneall_ffm_sym_Multiply(0,psi1,Hpsi,npack1,hml)
592              call Dneall_m_scal(0,(-1.0d0),hml)
593              call Dneall_mm_copy(0,hml,lmd)
594              call Dneall_mm_copy(0,hml,lmd1)
595           end if
596           !tmp_L = 2*lmd - lmda1
597           call Dneall_mm_copy(0,lmd,dbl_mb(tmp_L(1)))
598           call Dneall_m_scal(0,2.0d0,dbl_mb(tmp_L(1)))
599           call Dneall_mm_daxpy(0,-1.0d0,lmd1,dbl_mb(tmp_L(1)))
600           call psp_paw_overlap_fion(ispin,
601     >                          dbl_mb(tmp_L(1)),
602     >                          psi1,
603     >                          dbl_mb(fion(1)))
604        end if
605
606*       ****************************
607*       **** get the qmmm force ****
608*       ****************************
609        if (pspw_qmmm_found()) call pspw_qmmm_fion(dbl_mb(fion(1)))
610
611*       **********************************
612*       **** get the dispersion force ****
613*       **********************************
614        if (ion_disp_on()) call ion_disp_force(dbl_mb(fion(1)))
615
616*       ******************************************
617*       **** get forces from external charges ****
618*       ******************************************
619        if (pspw_charge_found()) then
620           if(pspw_bqext()) then
621              call pspw_charge_charge_Fion(dbl_mb(fion(1)))
622           else
623              nion1 = ion_nion()
624              call pspw_charge_Fion_Fcharge(dbl_mb(fion(1)),
625     >                                      dbl_mb(fion(1)+3*nion1))
626              call pspw_charge_Fcharge(dbl_mb(fion(1)+3*nion1))
627              call pspw_charge_rho_Fcharge(n2ft3d,dbl_mb(r_grid(1)),
628     >                          dbl_mb(rho(1)),
629     >                          dv,dbl_mb(fion(1)+3*nion1))
630           end if
631        end if
632
633*       *****************************************
634*       **** get forces from external Efield ****
635*       *****************************************
636        if (pspw_Efield_found()) then
637           call pspw_Efield_Fion(dbl_mb(fion(1)))
638        end if
639
640
641
642*       *****************************************
643*       **** remove ion forces using ion_FixIon *
644*       *****************************************
645        if (fei) call Parallel_shared_vector_copy(.false.,
646     >                        3*natmx,dbl_mb(fion(1)),dbl_mb(ftest(1)))
647
648        call ion_FixIon(dbl_mb(fion(1)))
649
650c        if (meta) call meta_force(ispin,neq,psi1,Hpsi,dbl_mb(fion(1)))
651
652        !**** center of mass constraint ****
653c        if (.not.allow_translation) then
654c          call remove_center_F_mass(dbl_mb(fion(1)))
655c        end if
656
657*       **************************
658*       **** do a verlet step ****
659*       **************************
660        if (verlet) then
661*          **** constant temperature ****
662           if (nose) then
663!$OMP MASTER
664             sse = Nose_sse()
665             ssr = Nose_ssr()
666!$OMP END MASTER
667!$OMP BARRIER
668             do n=1,n2(ispin)
669              call Pack_c_SMul(1,0.5d0*dte,Hpsi(1,n),psi2(1,n))
670              call Pack_cc_daxpy(1,-1.0d0,psi0(1,n),psi2(1,n))
671              call Pack_cc_daxpy(1,1.0d0,psi1(1,n),psi2(1,n))
672c              call Pack_c_SMul(1,2.0d0*sse,psi2(1,n),psi2(1,n))
673              call Pack_c_SMul1(1,2.0d0*sse,psi2(1,n))
674              call Pack_cc_daxpy(1,1.0d0,psi0(1,n),psi2(1,n))
675             end do
676             call ion_nose_step(ssr,dbl_mb(fion(1)))
677
678*          **** constant energy ****
679           else
680             do n=1,n2(ispin)
681              call Pack_c_SMul(1,dte*sa1,Hpsi(1,n),psi2(1,n))
682              call Pack_cc_daxpy(1,-1.0d0*sa2,psi0(1,n),psi2(1,n))
683              call Pack_cc_daxpy(1,2.0d0*sa1,psi1(1,n),psi2(1,n))
684             end do
685
686*            **** QM/MM Verlet update ****
687             call ion_verlet_step(dbl_mb(fion(1)),sa_alpha(2))
688           end if
689
690*       **************************
691*       **** do a newton step ****
692*       **************************
693        else
694           r = 1.0d0
695           s = 1.0d0
696           if (nose) then
697             r =  (1.0d0-0.5d0*dt*Nose_dXr())
698             s =  (1.0d0-0.5d0*dt*Nose_dXe())
699           end if
700           do n=1,n2(ispin)
701              call Pack_c_SMul(1,dte,Hpsi(1,n),psi2(1,n))
702              call Pack_cc_daxpy(1,s*dt*sa_alpha(1),psi0(1,n),psi2(1,n))
703c              call Pack_cc_Sum(1,psi2(1,n),psi1(1,n),psi2(1,n))
704              call Pack_cc_Sum2(1,psi1(1,n),psi2(1,n))
705           end do
706
707*          **** QM/MM Newton update ****
708           call ion_newton_step(dbl_mb(fion(1)),sa_alpha(2)*r)
709
710        end if
711
712
713*       *****************************************
714*       **** lagrange multiplier corrections ****
715*       *****************************************
716
717        !**** orthoganality constraint ****
718        dte0 = dte
719        if (nose.and.verlet) dte0 = dte*sse
720
721        if (psp_pawexist()) then
722           call psp_overlap_S(ispin,neq,psi1,psi_r)
723           call phafac2()
724           call Dneall_mm_copy(0,lmd,lmd1)
725           call psi_lmbda_paw(ispin,ne,nemaxq,npack1,psi_r,psi2,
726     >                        dte,
727     >                        lmd,dbl_mb(tmp_L(1)),ierr)
728        else if (fractional) then
729           call psi_lmbda2(ispin,ne,nemaxq,npack1,psi1,psi2,
730     >                     dte0,occ1,
731     >                     lmd,dbl_mb(tmp_L(1)),ierr)
732        else if (sic) then
733           call psi_lmbda_sic(ispin,ne,nemaxq,npack1,psi1,psi2,dte0,
734     >                        lmd,dbl_mb(tmp_L(1)),ierr)
735        else
736#ifdef PSI_OMP
737!           write(*,*) 'T',tid, ':', dbl_mb(thrlmbda(1))
738           call psi_lmbda_omp(ispin,ne,nemaxq,nida,nidb,psi1,psi2,dte0,
739     >                  lmd,dbl_mb(tmp_L(1)),ierr,dbl_mb(thrlmbda(1)),
740     >                                          taskid,adiff,notgram)
741#else
742           call psi_lmbda(ispin,ne,nemaxq,npack1,psi1,psi2,dte0,
743     >                    lmd,dbl_mb(tmp_L(1)),ierr)
744
745#endif
746        end if
747
748        !**** center of mass constraint ****
749
750        !**** total angular momentum constraint ****
751
752
753*       **************************
754*       *** update thermostats ***
755*       **************************
756        if (nose) then
757          if (verlet) then
758             if (psp_pawexist()) then
759!$OMP MASTER
760                eke = 0.0d0
761!$OMP END MASTER
762                do i=1,n2(ispin)
763                   call Pack_c_SMul1(1,-h,psi0(1,i))
764                   call Pack_cc_daxpy(1,h,psi2(1,i),psi0(1,i))
765                   call Pack_cc_idot(1,psi0(1,i),psi0(1,i),sum)
766!$OMP MASTER
767                   eke = eke+sum
768!$OMP END MASTER
769                end do
770                if (np.gt.1) call Parallel_SumAll(eke)
771!$OMP MASTER
772                if (ispin.eq.1) eke = 2.0d0*eke
773                eke = eke*fmass
774!$OMP END MASTER
775             else
776!$OMP MASTER
777                eke = 0.0d0
778!$OMP END MASTER
779                do i=1,n2(ispin)
780                   call Pack_cc_idot(1,psi2(1,i),psi0(1,i),sum)
781!$OMP MASTER
782                   eke = eke+sum
783!$OMP END MASTER
784                end do
785                if (np.gt.1) call Parallel_SumAll(eke)
786!$OMP MASTER
787                eke = (ne(1)+ne(2) - eke)
788                if (ispin.eq.1) eke = 2.0d0*eke
789                eke = 0.5d0*(fmass/(dt*dt))*eke
790!$OMP END MASTER
791             end if
792!$OMP MASTER
793             eki = ion_ke()
794!$OMP END MASTER
795             call Nose_Verlet_Step(eke,eki)
796          else
797!$OMP MASTER
798              eke = 0.0d0
799!$OMP END MASTER
800              do i=1,n2(ispin)
801                call Pack_cc_idot(1,psi0(1,i),psi0(1,i),sum)
802!$OMP MASTER
803                eke = eke+sum
804!$OMP END MASTER
805              end do
806              if (np.gt.1) call Parallel_SumAll(eke)
807!$OMP MASTER
808              if (ispin.eq.1) eke = 2.0d0*eke
809              eke = eke*fmass
810              eki = ion_ke()
811!$OMP END MASTER
812              call Nose_Newton_Step(eke,eki)
813          end if
814        end if
815
816
817*       ********************
818*       **** call dplot ****
819*       ********************
820        if (dplot_iteration_check(it+it_sum)) then
821         call dplot_iteration((it+it_sum),ispin,neq,psi1,dn,psi_r)
822        end if
823
824
825      end do
826*     ******************************************************
827*     ***** end main loop **********************************
828*     ******************************************************
829
830
831*     *************************************
832*     ***** total energy calculation ******
833*     *************************************
834      if (verlet) then
835         call nwpw_timing_start(10)
836         call Parallel2d_np_i(np_i)
837         call Parallel2d_np_j(np_j)
838         if (psp_pawexist()) call phafac() !*** reset phase factors to r1 ***
839
840*        *** get orbital energies ****
841         call Dneall_ffm_sym_Multiply(0,psi1,Hpsi,npack1,hml)
842         call Dneall_m_scal(0,(-1.0d0),hml)
843      end if
844!$OMP END PARALLEL
845
846*     **** if newton then skip energy calculations ****
847      if (.not. verlet) goto 333
848
849      if (fractional) then
850         call Dneall_m_diag_scal(0,occ1,hml)
851         eorbit = Dneall_m_trace(0,hml)
852         call Dneall_m_diag_scal_inv(0,occ1,hml)
853      else
854         eorbit = Dneall_m_trace(0,hml)
855      end if
856      if (ispin.eq.1) eorbit = eorbit+eorbit
857
858
859
860*     **** get ewald energy ****
861      eion = 0.0d0
862      if (control_version().eq.3) eion = ewald_e()
863
864*     **** get free-space ion-ion energy ****
865      if (control_version().eq.4) eion = ion_ion_e()
866
867
868
869*     **** get coulomb energy ****
870      if (control_version().eq.3) ehartr = coulomb_e(dcpl_mb(dng(1)))
871      if (control_version().eq.4) then
872         call D3dB_rr_dot(1,dbl_mb(rho(1)),dbl_mb(vc(1)),ehartr)
873         ehartr = 0.5d0*ehartr*dv
874      end if
875
876
877
878*     **** get exchange-correlation energy ****
879      call D3dB_rr_dot(1,dbl_mb(dnall(1)),dbl_mb(xce(1)),exc)
880      call D3dB_rr_dot(1,dn(1,1),dbl_mb(xcp(1)),pxc)
881      if (ispin.eq.1) then
882         exc= exc + exc
883         pxc= pxc + pxc
884      else
885         call D3dB_rr_dot(1,dbl_mb(dnall(1)+n2ft3d),
886     >                      dbl_mb(xce(1)),exc2)
887         call D3dB_rr_dot(1,dn(1,2),dbl_mb(xcp(1)+n2ft3d),pxc2)
888         exc= exc + exc2
889         pxc= pxc + pxc2
890      end if
891      exc = exc*dv
892      pxc = pxc*dv
893
894      if (nwpw_meta_gga_on()) then
895         pxc = pxc + nwpw_meta_gga_pxc(ispin,neq,psi1)
896      end if
897
898
899*     **** velocity and kinetic energy of psi ****
900      if ((.not.psp_pawexist()).or.(.not.nose)) then
901         eke = 0.0d0
902         do i=1,n2(ispin)
903            call Pack_c_SMul1(1,-h,psi0(1,i))
904            call Pack_cc_daxpy(1,h,psi2(1,i),psi0(1,i))
905            call Pack_cc_idot(1,psi0(1,i),psi0(1,i),sum)
906            eke = eke+sum
907         end do
908         if (np.gt.1) call Parallel_SumAll(eke)
909         eke = eke*fmass
910         if (ispin.eq.1) eke = 2.0d0*eke
911      end if
912
913
914
915*     **** total energy ****
916      Eold=E(1)
917      E(2) = eorbit + eion + exc - ehartr - pxc + espring
918      E(3) = eke
919      E(4) = ion_ke()
920      E(5) = eorbit
921      E(6) = ehartr
922      E(7) = exc
923      E(8) = eion
924      E(22) = espring
925
926*     ******** QM/MM energies ******
927      if (pspw_qmmm_found()) then
928         e_lj     = pspw_qmmm_LJ_E()
929         e_q      = pspw_qmmm_Q_E()
930         e_spring = pspw_qmmm_spring_E()
931         E(2)  = E(2) + e_lj + e_q + e_spring
932
933         E(11) = e_lj
934         E(12) = e_q
935         E(13) = e_spring
936
937         E(14) = pspw_qmmm_LJ_Emix()
938         E(14) = E(14) + pspw_qmmm_Q_Emix()
939         call v_local_mm(dcpl_mb(vl(1)))
940         call Pack_cc_dot(0,dcpl_mb(dng(1)),dcpl_mb(vl(1)),elocal)
941         if (control_version().eq.4) then
942            call v_lr_local_mm(dbl_mb(r_grid(1)),dbl_mb(vlr_l(1)))
943            call D3dB_rr_dot(1,dbl_mb(rho(1)),dbl_mb(vlr_l(1)),sum)
944            elocal = elocal + sum*dv
945         end if
946         E(14) = E(14) + elocal
947         E(23) = E(23) + E(14)
948         E(24) = E(24) + E(14)*E(14)
949
950      end if
951
952*     **** get pspw_charge energies ****
953         !psi_1v_field()
954      if (field_exist) then
955         E(19)  = electron_psi_v_field_ave(psi1,dn)
956         E(20)  = pspw_charge_Energy_ion()
957     >          + pspw_Efield_Energy_ion()
958         E(21)  = pspw_charge_Energy_charge()
959         E(1)   = E(1) + E(20) + E(21)
960      end if
961
962*     **** PAW ee terms ****
963      if (psp_pawexist()) then
964
965         ehartree_atom = psp_hartree_atom(ispin,neq,psi1)
966         ecmp_cmp      = psp_hartree_cmp_cmp(ispin)
967         ecmp_pw       = psp_hartree_cmp_pw(ispin,dcpl_mb(dng(1)),dn)
968         !E(40) = ehartree_atom             !*** vcoulomb atom  ***
969         !E(41) = ecmp_cmp                  !*** ncmp-ncmp coulomb energy ***
970         !E(42) = ecmp_pw                   !*** ncmp-pw coulomb energy ***
971
972         call psp_xc_atom(ispin,neq,psi1,exc_atom,pxc_atom)
973         !E(43) = exc_atom                  !*** exc atom  ***
974         !E(44) = pxc_atom                  !*** pxc atom  ***
975
976         E(36) = psp_kinetic_core()        !*** kinetic core  - independent of psi ***
977         E(45) = psp_ion_core()            !*** ion core energy - independent of psi ***
978
979         E(2) = E(2)
980     >        + exc_atom - pxc_atom
981     >        - ehartree_atom - ecmp_cmp - ecmp_pw
982      end if
983
984*     **** AQC Energy ****
985      if (V_APC_on) then
986         E(52) = Eapc
987         E(53) = Papc
988         E(2)  = E(2) + E(52) - E(53)
989      end if
990
991
992*     **** SIC corrections ****
993      if (pspw_SIC()) then
994         call pspw_energy_SIC(ispin,psi_r,ehsic,phsic,exsic,pxsic)
995         E(2) = E(2) + ehsic + exsic
996         E(16) = ehsic
997         E(17) = exsic
998         if (pspw_SIC_relaxed()) then
999            E(2)  = E(2) - phsic - pxsic
1000            E(18) = phsic
1001            E(19) = pxsic
1002         end if
1003      end if
1004
1005*     **** HFX corrections ****
1006      if (pspw_HFX()) then
1007         call pspw_energy_HFX(ispin,psi_r,ehfx,phfx)
1008         E(2) = E(2) + ehfx
1009         E(20) = ehfx
1010         if (pspw_HFX_relaxed()) then
1011            E(2)  = E(2) - phfx
1012            E(21) = phfx
1013         end if
1014      end if
1015
1016*     **** DFT+U terms ****
1017      if (psp_U_psputerm()) then
1018         call psp_U_psputerm_energy(ehfx,phfx)
1019         E(29) =  ehfx
1020         E(30) =  phfx
1021         E(2)  = E(2) + E(29) - E(30)
1022      end if
1023
1024*     **** metadynamics energy ****
1025      if (meta_found()) then
1026         call meta_energypotential(ispin,neq,psi1,E(31),E(32))
1027         E(2)  = E(2) + E(31) - E(32)
1028      end if
1029
1030*     **** dispersion energy ***
1031      if (ion_disp_on()) then
1032         E(33) = ion_disp_energy()
1033         E(2)  = E(2) + E(33)
1034      end if
1035
1036*     **** tamd energy ****
1037      if (tamd_found()) then
1038         call tamd_energypotential(ispin,neq,psi1,E(34),E(35))
1039         E(2)  = E(2) + E(34) - E(35)
1040      end if
1041
1042
1043*     **** Running Sums - Energy and Energy**2 sum ***
1044      E(25) = E(25) + E(2)
1045      E(26) = E(26) + E(2)*E(2)
1046      E(27) = E(27) + E(2)+E(3)+E(4)
1047      E(28) = E(28) + (E(2)+E(3)+E(4))**2
1048
1049*     **** output Forces for Fei ***
1050      if (fei) call fei_output(E(2),dbl_mb(ftest(1)))
1051
1052
1053*     **** Nose thermostat energies ****
1054      if (nose) then
1055        E(9)  = Nose_e_energy()
1056        E(10) = Nose_r_energy()
1057        E(1)  = E(2)+E(3)+E(4)+E(9)+E(10)
1058      else
1059        E(1) = E(2)+E(3)+E(4)
1060      end if
1061
1062
1063*     ******** pressure ******
1064      if (calc_pressure) then
1065
1066c*        ***** average Kohn-Sham v_nonlocal energy ****
1067c        call dcopy(2*npack1*nemaxq,0.0d0,0,Hpsi,1)
1068c        call v_nonlocal(ispin,neq,psi1,Hpsi,
1069c     >                .false.,dbl_mb(ftest(1)),fractional,occ1)
1070c        enlocal = 0.0d0
1071c        do ms=1,ispin
1072c        do n=n1(ms),n2(ms)
1073c         call Pack_cc_idot(1,psi1(1,n),Hpsi(1,n),sum)
1074c         if (fractional) sum=sum*occ1(n)
1075c         enlocal = enlocal - sum
1076c        end do
1077c        end do
1078c        if (np.gt.1) call Parallel_SumAll(enlocal)
1079c        if (ispin.eq.1) enlocal = 2.0d0*enlocal
1080        enlocal = E_vnonlocal(ispin,neq,fractional,occ1)
1081
1082
1083        call cgsd_pressure_stress(ispin,neq,psi1,
1084     >                            dbl_mb(dnall(1)),
1085     >                            dcpl_mb(dng(1)),
1086     >                            dbl_mb(xcp(1)),
1087     >                            enlocal,exc,pxc,
1088     >                            pressure,p1,p2,stress)
1089      end if
1090
1091
1092*      **** write ecce data ****
1093       call ecce_print_module_entry('task car-parrinello')
1094       call ion_ecce()
1095
1096       call ecce_print1('total energy', mt_dbl,     E(1), 1)
1097       call ecce_print1('total kinetic', mt_dbl,    E(3)+E(4), 1)
1098       call ecce_print1('potential energy', mt_dbl, E(2), 1)
1099       call ecce_print1('electron kinetic', mt_dbl, E(3), 1)
1100       call ecce_print1('ion kinetic', mt_dbl,      E(4), 1)
1101       call ecce_print1('time', mt_dbl,      (it_in+it_sum)*dt, 1)
1102
1103       call ecce_print2('total gradient', mt_dbl, dbl_mb(fion(1)),
1104     $        3,3,natmx)
1105       call ecce_print1('gradient norm', mt_dbl, E(1), 1)
1106       call ecce_print1('orbital gradient norm', mt_dbl, E(4), 1)
1107c       call ecce_print1('gradient max', mt_dbl, E(1), 1)
1108       call ecce_print_module_exit('task car-parrinello', 'ok')
1109
1110
1111      call nwpw_timing_end(10)
1112
1113*     **** dealocate MA local variables ****
1114 333  continue
1115      call nwpw_timing_start(12)
1116
1117#ifdef PSI_OMP
1118      value = BA_pop_stack(thrlmbda(2))
1119
1120      call omp_destroy_nest_lock(reduce_lock1)
1121      call omp_destroy_nest_lock(reduce_lock2)
1122      call omp_destroy_nest_lock(reduce_lock3)
1123#endif
1124      value = BA_pop_stack(ftest(2))
1125      value = value.and.BA_pop_stack(fion(2))
1126      value = value.and.BA_pop_stack(dnall(2))
1127      value = value.and.BA_pop_stack(xce(2))
1128      value = value.and.BA_pop_stack(xcp(2))
1129      value = value.and.BA_pop_stack(dng(2))
1130      value = value.and.BA_pop_stack(rho(2))
1131      value = value.and.BA_pop_stack(vl(2))
1132      value = value.and.BA_pop_stack(v_field(2))
1133
1134      if ((control_version().eq.4).or.(field_exist))
1135     >  value = value.and.BA_pop_stack(r_grid(2))
1136
1137      if (control_version().eq.4)
1138     >  value = value.and.BA_pop_stack(vlr_l(2))
1139
1140      value = value.and.BA_pop_stack(vc(2))
1141      value = value.and.BA_pop_stack(tmp2(2))
1142      value = value.and.BA_pop_stack(tmp1(2))
1143      value = value.and.Dneall_m_pop_stack(tmp_L)
1144      if (.not.value)
1145     > call errquit('inner_loop_md:popping stack',1, MA_ERR)
1146
1147      call nwpw_timing_end(12)
1148
1149      return
1150      end
1151
1152