1* 2* $Id$ 3* 4* ************************************************************ 5* * MPI cpmd routine * 6* * * 7* * This is a developing cpmdv3 parallel code wrtten in * 8* * Fortran and MPI. * 9* * * 10* * + mpl message passing library used * 11* * * 12* * + ngp is used instead of nfft in this proceudure * 13* * * 14* * + error checking is based on aimd.h parameters * 15* * then control file * 16* ************************************************************ 17 18#ifdef USE_OPENMP 19#define PSI_OMP 20#endif 21 22 subroutine inner_loop_md(verlet,sa_alpha, 23 > ispin,ne,neq, 24 > npack1,nfft3d,nemaxq, 25 > psi0,psi1,psi2,dn, 26 > it_in,it_sum,E, 27 > hml,lmd,lmd1, 28 > Hpsi,psi_r, 29 > calc_pressure,pressure,p1,p2, 30 > fractional,occ0,occ1,occ2) 31#ifdef PSI_OMP 32 use omp_lib 33#endif 34 implicit none 35 logical verlet 36 real*8 sa_alpha(2) 37 integer ispin,ne(2),neq(2) 38 integer npack1,nfft3d,nemaxq 39 complex*16 psi0(npack1,nemaxq) 40 complex*16 psi1(npack1,nemaxq) 41 complex*16 psi2(npack1,nemaxq) 42 real*8 dn(2*nfft3d,2) 43 integer it_in,it_sum 44 real*8 E(*) 45 real*8 hml(*),lmd(*),lmd1(*) 46 47* **** very big workspace variables **** 48 complex*16 Hpsi(npack1,nemaxq) 49 real*8 psi_r(2*nfft3d,nemaxq) 50 51 logical calc_pressure 52 real*8 pressure,p1,p2,stress(3,3) 53 54 logical fractional 55 real*8 occ0(*),occ1(*),occ2(*) 56 57#include "bafdecls.fh" 58#include "errquit.fh" 59ccccccc#include "frac_occ.fh" 60 61 62* **** local variables **** 63 logical move,fei 64 integer n2ft3d,np,np_i,np_j,taskid 65 integer i,j,ii,jj,n,n1(2),n2(2),it,ms,nn,ierr,gga 66 integer nx,ny,nz 67 integer index,indext 68 real*8 sum,Eold,eorbit,eion,ehartr,eke,eki,sse,ssr,sa1,sa2 69 real*8 exc,exc2,pxc,pxc2,dte,dte0,scal1,scal2,dv,dt,fmass,h 70 real*8 ehsic,phsic,exsic,pxsic,ehfx,phfx,espring,enlocal,elocal 71 !real*8 e_ionmm,e_qmmm,e_mmmm,e_pol,e_vib,e_cav 72 real*8 e_lj,e_q,e_spring,Eapc,Papc 73 real*8 ehartree_atom,ecmp_cmp,ecmp_pw,exc_atom,pxc_atom 74 real*8 s,r 75 integer tid,nthreads 76 77 78* **** MA local variables **** 79 logical value,nose,field_exist,sic,allow_translation 80 logical V_APC_on 81* real*8 tmp_L(8*nemax*nemax) 82* complex*16 tmp1(nfft3d) 83* complex*16 tmp2(nfft3d) 84c complex*16 vl(nfft3d) 85c complex*16 vc(nfft3d) 86c complex*16 dng(nfft3d) 87c real*8 xcp(2*nfft3d,2) 88c real*8 xce(2*nfft3d,2) 89c real*8 fion(3,natmx) 90 integer tmp_L(2) 91 integer tmp1(2),tmp2(2) 92 integer vl(2),vc(2),dng(2) 93 integer rho(2),vlr_l(2),r_grid(2) 94 integer xcp(2),xce(2),dnall(2) 95 integer v_field(2) 96 integer natmx,fion(2),ftest(2) 97 integer npack0,nion1 98 99* ***** external functions **** 100 integer ion_nion,control_gga 101 real*8 ion_ke,ion_ion_e,E_vnonlocal 102 real*8 control_time_step,control_fake_mass,ion_dti 103 real*8 lattice_omega,coulomb_e,ewald_e 104 external ion_nion,control_gga 105 external ion_ke,ion_ion_e,E_vnonlocal 106 external control_time_step,control_fake_mass,ion_dti 107 external lattice_omega,coulomb_e,ewald_e 108 logical psp_semicore 109 external psp_semicore 110 integer control_version 111 external control_version 112 logical control_Nose,control_Fei 113 external control_Nose,control_Fei 114 real*8 Nose_e_energy,Nose_r_energy,Nose_sse,Nose_ssr 115 real*8 Nose_dXe,Nose_dXr 116 external Nose_e_energy,Nose_r_energy,Nose_sse,Nose_ssr 117 external Nose_dXe,Nose_dXr 118 119* ***** QM/MM external functions **** 120 logical pspw_qmmm_found 121 real*8 pspw_qmmm_LJ_E,pspw_qmmm_Q_E,pspw_qmmm_spring_E 122 real*8 pspw_qmmm_LJ_Emix,pspw_qmmm_Q_Emix 123 external pspw_qmmm_found 124 external pspw_qmmm_LJ_E,pspw_qmmm_Q_E,pspw_qmmm_spring_E 125 external pspw_qmmm_LJ_Emix,pspw_qmmm_Q_Emix 126 127* ***** pspw_charge external functions **** 128 logical pspw_charge_found,pspw_bqext 129 external pspw_charge_found,pspw_bqext 130 integer pspw_charge_nion 131 external pspw_charge_nion 132 real*8 pspw_charge_Energy_ion,pspw_charge_Energy_charge 133 external pspw_charge_Energy_ion,pspw_charge_Energy_charge 134 real*8 electron_psi_v_field_ave 135 external electron_psi_v_field_ave 136 logical pspw_Efield_found 137 external pspw_Efield_found 138 real*8 pspw_Efield_Energy_ion 139 external pspw_Efield_Energy_ion 140 141 142 logical dplot_iteration_check 143 external dplot_iteration_check 144 145 logical pspw_SIC,pspw_SIC_relaxed,control_allow_translation 146 logical pspw_HFX,pspw_HFX_relaxed 147 external pspw_SIC,pspw_SIC_relaxed,control_allow_translation 148 external pspw_HFX,pspw_HFX_relaxed 149 150 double precision Dneall_m_trace 151 external Dneall_m_trace 152 logical Dneall_m_push_get_block,Dneall_m_pop_stack 153 external Dneall_m_push_get_block,Dneall_m_pop_stack 154 155 logical meta_found,tamd_found,psp_U_psputerm 156 external meta_found,tamd_found,psp_U_psputerm 157 logical nwpw_meta_gga_on,ion_disp_on 158 external nwpw_meta_gga_on,ion_disp_on 159 real*8 nwpw_meta_gga_pxc,ion_disp_energy 160 external nwpw_meta_gga_pxc,ion_disp_energy 161 162* ***** PAW external functions **** 163 logical psp_pawexist 164 external psp_pawexist 165 real*8 psp_hartree_atom,psp_hartree_cmp_cmp,psp_hartree_cmp_pw 166 external psp_hartree_atom,psp_hartree_cmp_cmp,psp_hartree_cmp_pw 167 real*8 psp_kinetic_core,psp_ion_core 168 external psp_kinetic_core,psp_ion_core 169 integer Parallel_threadid,Parallel_nthreads 170 external Parallel_threadid,Parallel_nthreads 171 172* **** cosmo external functions **** 173 logical pspw_V_APC_on 174 external pspw_V_APC_on 175 176 177#ifdef PSI_OMP 178 integer nida,nidb 179 integer mthr 180 181 real*8 adiff 182 logical notgram 183 184 integer MASTER 185 parameter (MASTER=0) 186 integer Parallel_maxthreads 187 external Parallel_maxthreads 188 integer thrlmbda(2) 189 190 INTEGER(kind=omp_nest_lock_kind) reduce_lock1 191 INTEGER(kind=omp_nest_lock_kind) reduce_lock2 192 INTEGER(kind=omp_nest_lock_kind) reduce_lock3 193 common / reduce_ffm / reduce_lock1,reduce_lock2,reduce_lock3 194 195* **** matrix_blocking common block **** 196 integer mblock(2),nblock(2),algorithm(2) 197 common /matrix_blocking/ mblock,nblock,algorithm 198#endif 199 200 201 202 call Parallel_np(np) 203 call Parallel_taskid(taskid) 204 call Pack_npack(0,npack0) 205 206 n2ft3d = 2*nfft3d 207 field_exist = pspw_charge_found().or.pspw_Efield_found() 208 sic = pspw_SIC() 209 gga = control_gga() 210 allow_translation = control_allow_translation() 211 fei = control_Fei() 212 V_APC_on = pspw_V_APC_on() 213 214* **** allocate MA local variables **** 215 call nwpw_timing_start(12) 216 value = Dneall_m_push_get_block(1,8,tmp_L) 217 value = value.and. 218 > BA_push_get(mt_dcpl,(nfft3d),'tmp1',tmp1(2),tmp1(1)) 219 value = value.and. 220 > BA_push_get(mt_dcpl,(nfft3d),'tmp2',tmp2(2),tmp2(1)) 221 222 if (control_version().eq.3) then 223 value = value.and. 224 > BA_push_get(mt_dcpl,(npack0),'vc', vc(2), vc(1)) 225 end if 226 227 if (control_version().eq.4) then 228 value = value.and. 229 > BA_push_get(mt_dbl,(n2ft3d),'vc',vc(2),vc(1)) 230 231 value = value.and. 232 > BA_push_get(mt_dbl,(n2ft3d),'vlr_l',vlr_l(2),vlr_l(1)) 233 end if 234 235 if ((control_version().eq.4).or.(field_exist)) then 236 value = value.and. 237 > BA_push_get(mt_dbl,(3*n2ft3d),'r_grid',r_grid(2),r_grid(1)) 238 end if 239 240 value = value.and. 241 > BA_push_get(mt_dbl,(n2ft3d),'v_field',v_field(2),v_field(1)) 242 243 value = value.and. 244 > BA_push_get(mt_dcpl,(npack0),'vl', vl(2), vl(1)) 245 value = value.and. 246 > BA_push_get(mt_dbl,(n2ft3d),'rho',rho(2), rho(1)) 247 value = value.and. 248 > BA_push_get(mt_dcpl,(npack0),'dng',dng(2), dng(1)) 249 value = value.and. 250 > BA_push_get(mt_dbl,(4*nfft3d),'xcp',xcp(2), xcp(1)) 251 value = value.and. 252 > BA_push_get(mt_dbl,(4*nfft3d),'xce',xce(2), xce(1)) 253 value = value.and. 254 > BA_push_get(mt_dbl,(4*nfft3d),'dnall',dnall(2),dnall(1)) 255 natmx = ion_nion() 256 if (pspw_charge_found().and. 257 > (.not.pspw_bqext())) natmx = natmx + pspw_charge_nion() 258 value = value.and. 259 > BA_push_get(mt_dbl,(3*natmx),'fion',fion(2),fion(1)) 260 value = value.and. 261 > BA_push_get(mt_dbl,(3*natmx),'ftest',ftest(2),ftest(1)) 262 call Parallel_shared_vector_zero(.false.,3*natmx,dbl_mb(fion(1))) 263 call Parallel_shared_vector_zero(.true.,3*natmx,dbl_mb(ftest(1))) 264 265#ifdef PSI_OMP 266 mthr = Parallel_maxthreads() 267 mthr = max(mthr,4) 268 269 call Parallel_np(np) 270 call Parallel_taskid(taskid) 271 nida = 0 272 if (taskid.eq.MASTER) nida = 1 273 nidb = npack1-nida 274 275 !write(*,*) mthr,ne(1) 276 277c value = value .and. MA_set_auto_verify(.true.) 278 value = value.and. 279 > BA_push_get(mt_dbl,(mthr*ne(1)*ne(1)*8),'thrlmbda', 280 > thrlmbda(2),thrlmbda(1)) 281 282 call omp_init_nest_lock(reduce_lock1) 283 call omp_init_nest_lock(reduce_lock2) 284 call omp_init_nest_lock(reduce_lock3) 285* **** define blocking for dgemm matrix multiply **** 286 algorithm(1) = -1 287 algorithm(2) = -1 288 do ms=1,ispin 289 if (ne(ms).gt.(mthr*np)) then 290 algorithm(ms) = 1 291 call Parallel_matrixblocking(mthr*np,ne(ms),ne(ms), 292 > mblock(ms),nblock(ms)) 293 else if (ne(ms).gt.(mthr)) then 294 algorithm(ms) = 0 295 call Parallel_matrixblocking(mthr,ne(ms),ne(ms), 296 > mblock(ms),nblock(ms)) 297 else 298 algorithm(ms) = -1 299 end if 300 end do 301 302#endif 303 if (.not.value) 304 > call errquit('inner_loop_md:pushing stack',0, MA_ERR) 305 306 call Parallel_shared_vector_zero(.false., 307 > 4*nfft3d,dbl_mb(dnall(1))) 308 call Parallel_shared_vector_zero(.false.,4*nfft3d,dbl_mb(xcp(1))) 309 call Parallel_shared_vector_zero(.true.,4*nfft3d,dbl_mb(xce(1))) 310 311 call nwpw_timing_end(12) 312 313 call D3dB_nx(1,nx) 314 call D3dB_ny(1,ny) 315 call D3dB_nz(1,nz) 316 move = .true. 317 318 nose = control_Nose() 319 sse = 1.0d0 320 ssr = 1.0d0 321 322 n1(1) = 1 323 n2(1) = neq(1) 324 n1(2) = neq(1) + 1 325 n2(2) = neq(1) + neq(2) 326 327 dt = control_time_step() 328 fmass = control_fake_mass() 329 dte = dt*dt/fmass 330 if (.not. verlet) dte=0.5d0*dte 331 h = 1.0d0/(2.0d0*dt) 332 333 sa1 = 1.0d0 334 sa2 = 1.0d0 335 if (.not.nose) then 336 sa1 = 1.0d0/(2.0d0-sa_alpha(1)) 337 sa2 = sa_alpha(1)/(2.0d0-sa_alpha(1)) 338 end if 339 340 scal1 = 1.0d0/dble(nx*ny*nz) 341 scal2 = 1.0d0/lattice_omega() 342 dv = scal1*lattice_omega() 343 344 if ((control_version().eq.4).or.(field_exist)) 345 > call lattice_r_grid(dbl_mb(r_grid(1))) 346 347 espring = 0.0d0 348 349 350* ****************************************** 351* **** **** 352* **** Start of molecular dynamics loop **** 353* **** **** 354* ****************************************** 355!$OMP PARALLEL private(n,i,ms,it,tid,nthreads,r,s,dte0) 356 tid = Parallel_threadid() 357 nthreads = Parallel_nthreads() 358 do it=1,it_in 359 !call dcopy(2*npack1*nemaxq,psi1,1,psi0,1) 360 !call dcopy(2*npack1*nemaxq,psi2,1,psi1,1) 361 call Pack_c_nCopy(1,nemaxq,psi1,psi0) 362 call Pack_c_nCopy(1,nemaxq,psi2,psi1) 363* *** skip ion_shift if newton step *** 364 if (verlet) call ion_shift() 365 if (nose.and.verlet) call Nose_shift() 366 367 368* ******************************** 369* **** generate phaze factors **** 370* ******************************** 371 call phafac() 372 if (control_version().eq.3) call ewald_phafac() 373 if (psp_pawexist()) call nwpw_gintegrals_set(.true.) 374 375 call nwpw_timing_start(11) 376* ******************* 377* **** get psi_r **** 378* ******************* 379c do n=n1(1),n2(ispin) 380c call Pack_c_Copy(1,psi1(1,n),psi_r(1,n)) 381c call Pack_c_unpack(1,psi_r(1,n)) 382c call D3dB_cr_fft3b(1,psi_r(1,n)) 383c call D3dB_r_Zero_Ends(1,psi_r(1,n)) 384c end do 385 386!$OMP DO private(n) 387 do n=n1(1),n2(ispin) 388 call Pack_c_Copy0(1,psi1(1,n),psi_r(1,n)) 389 end do 390!$OMP END DO 391 392 call Grsm_gh_fftb(nfft3d,n2(ispin),psi_r) 393!$OMP BARRIER 394 395!$OMP DO private(n) 396 do n=n1(1),n2(ispin) 397 call D3dB_r_Zero_Ends0(1,psi_r(1,n)) 398 end do 399!$OMP END DO 400 401 call pspw_hfx_localize2_n(3,psi_r,psi0,psi1,psi2) 402 403 call nwpw_meta_gga_gen_tau(ispin,neq,psi1) 404 405 406* ********************* 407* **** generate dn **** 408* ********************* 409 !call dcopy(ispin*n2ft3d,0.0d0,0,dn,1) 410 call Parallel_shared_vector_zero(.true.,ispin*n2ft3d,dn) 411 if (fractional) then 412 do ms=1,ispin 413 do n=n1(ms),n2(ms) 414!$OMP DO private(i) 415 do i=1,n2ft3d 416 dn(i,ms) = dn(i,ms) 417 > + scal2*(psi_r(i,n)**2) 418 > *occ1(n) 419 end do 420!$OMP END DO 421 end do 422 !call D3dB_r_Zero_Ends(1,dn(1,ms)) 423 !call D1dB_Vector_SumAll(n2ft3d,dn(1,ms)) 424 end do 425 else 426 do ms=1,ispin 427 do n=n1(ms),n2(ms) 428!$OMP DO private(i) 429 do i=1,n2ft3d 430 dn(i,ms) = dn(i,ms) + scal2*(psi_r(i,n)**2) 431 end do 432!$OMP END DO 433 end do 434 !call D3dB_r_Zero_Ends(1,dn(1,ms)) 435 !call D1dB_Vector_SumAll(n2ft3d,dn(1,ms)) 436 end do 437 end if 438 call D1dB_Vector_SumAll(ispin*n2ft3d,dn) 439 440 441* ********************** 442* **** generate dng **** 443* ********************** 444 call D3dB_rr_Sum(1,dn(1,1),dn(1,ispin),dbl_mb(rho(1))) 445 call D3dB_r_SMul(1,scal1,dbl_mb(rho(1)),dcpl_mb(tmp1(1))) 446 call D3dB_rc_fft3f(1,dcpl_mb(tmp1(1))) 447 call Pack_c_pack(0,dcpl_mb(tmp1(1))) 448 call Pack_c_Copy(0,dcpl_mb(tmp1(1)),dcpl_mb(dng(1))) 449 450 451* ******************************************************** 452* **** generate dnall - used for semicore corrections **** 453* ******************************************************** 454 if (psp_semicore(0)) then 455 call semicore_density_update() 456 call semicore_density(dcpl_mb(tmp1(1))) 457c call D3dB_r_SMul(1,0.5d0,dcpl_mb(tmp1(1)),dcpl_mb(tmp1(1))) 458 call D3dB_r_SMul1(1,0.5d0,dcpl_mb(tmp1(1))) 459 else 460 !call dcopy(n2ft3d,0.0d0,0,dcpl_mb(tmp1(1)),1) 461 call D3dB_r_Zero(1,dcpl_mb(tmp1(1))) 462 end if 463 do ms=1,ispin 464 call D3dB_rr_Sum(1,dn(1,ms), 465 > dcpl_mb(tmp1(1)), 466 > dbl_mb(dnall(1) +(ms-1)*n2ft3d)) 467 end do 468 call nwpw_timing_end(11) 469 470 471 472 473 474* ***************************************** 475* **** generate local pseudopotential **** 476* **** and also get force if move true **** 477* ***************************************** 478 call v_local(dcpl_mb(vl(1)), 479 > move, 480 > dcpl_mb(dng(1)), 481 > dbl_mb(fion(1))) 482 483 484* *** long-range psp for charge systems *** 485 if (control_version().eq.4) then 486 call v_lr_local(dbl_mb(r_grid(1)), 487 > dbl_mb(vlr_l(1))) 488 if (move) then 489 call grad_v_lr_local(dbl_mb(r_grid(1)), 490 > dbl_mb(rho(1)), 491 > dbl_mb(fion(1))) 492 end if 493 end if 494 495 if (V_APC_on) then 496 call pspw_V_APC(ispin,ne, 497 > dcpl_mb(dng(1)), 498 > dcpl_mb(vl(1)), 499 > Eapc,Papc, 500 > move, 501 > dbl_mb(fion(1))) 502 end if 503 504 505* ************************************ 506* **** generate coulomb potential **** 507* ************************************ 508 if (control_version().eq.3) 509 > call coulomb_v(dcpl_mb(dng(1)),dcpl_mb(vc(1))) 510 511 if (control_version().eq.4) 512 > call coulomb2_v(dbl_mb(rho(1)),dbl_mb(vc(1))) 513 514 515* ************************************************* 516* **** generate exchange-correlation potential **** 517* ************************************************* 518 call v_bwexc_all_tmp1(gga,n2ft3d,ispin, 519 > dbl_mb(dnall(1)), 520 > dbl_mb(xcp(1)), 521 > dbl_mb(xce(1)), 522 > dcpl_mb(tmp1(1))) 523 524 525* ********************************************** 526* **** generate other real-space potentials **** 527* ********************************************** 528 if (field_exist) then 529 530 !call dcopy(n2ft3d,0.0d0,0,dbl_mb(v_field(1)),1) 531 call D3dB_r_Zero(1,dbl_mb(v_field(1))) 532 533* **** generate charge potential **** 534 if (pspw_charge_found()) then 535 call pspw_charge_Generate_V(n2ft3d, 536 > dbl_mb(r_grid(1)), 537 > dbl_mb(v_field(1))) 538 end if 539 540* **** generate Efield potential **** 541 if (pspw_Efield_found()) then 542 call pspw_Efield_Generate_V(n2ft3d, 543 > dbl_mb(r_grid(1)), 544 > dbl_mb(v_field(1))) 545 end if 546 end if 547 548 549 550* ****************** 551* **** get Hpsi **** 552* ****************** 553 if (control_version().eq.3) 554 > call psi_H(ispin,neq,psi1,psi_r, 555 > dcpl_mb(vl(1)), 556 > dbl_mb(v_field(1)),field_exist, 557 > dcpl_mb(vc(1)),dbl_mb(xcp(1)),Hpsi, 558 > move,dbl_mb(fion(1)),fractional,occ1) 559 560 if (control_version().eq.4) 561 > call psi_Hv4(ispin,neq,psi1,psi_r, 562 > dcpl_mb(vl(1)),dbl_mb(vlr_l(1)), 563 > dbl_mb(v_field(1)),field_exist, 564 > dbl_mb(vc(1)),dbl_mb(xcp(1)),Hpsi, 565 > move,dbl_mb(fion(1)),fractional,occ1) 566 567 568 569* ********************** 570* **** get ewald force * 571* ********************** 572* **** get the ewald force **** 573 if (control_version().eq.3) call ewald_f(dbl_mb(fion(1))) 574 575* **** get the free-space ion force **** 576 if (control_version().eq.4) call ion_ion_f(dbl_mb(fion(1))) 577 578 579* ************************ 580* **** get semicoreforce * 581* ************************ 582 if (psp_semicore(0)) then 583 call semicore_xc_F(ispin,dbl_mb(xcp(1)),dbl_mb(fion(1))) 584 end if 585 586* *********************************************** 587* **** get the paw lagrange multiplier force **** 588* *********************************************** 589 if (psp_pawexist()) then 590 if (.not.verlet) then 591 call Dneall_ffm_sym_Multiply(0,psi1,Hpsi,npack1,hml) 592 call Dneall_m_scal(0,(-1.0d0),hml) 593 call Dneall_mm_copy(0,hml,lmd) 594 call Dneall_mm_copy(0,hml,lmd1) 595 end if 596 !tmp_L = 2*lmd - lmda1 597 call Dneall_mm_copy(0,lmd,dbl_mb(tmp_L(1))) 598 call Dneall_m_scal(0,2.0d0,dbl_mb(tmp_L(1))) 599 call Dneall_mm_daxpy(0,-1.0d0,lmd1,dbl_mb(tmp_L(1))) 600 call psp_paw_overlap_fion(ispin, 601 > dbl_mb(tmp_L(1)), 602 > psi1, 603 > dbl_mb(fion(1))) 604 end if 605 606* **************************** 607* **** get the qmmm force **** 608* **************************** 609 if (pspw_qmmm_found()) call pspw_qmmm_fion(dbl_mb(fion(1))) 610 611* ********************************** 612* **** get the dispersion force **** 613* ********************************** 614 if (ion_disp_on()) call ion_disp_force(dbl_mb(fion(1))) 615 616* ****************************************** 617* **** get forces from external charges **** 618* ****************************************** 619 if (pspw_charge_found()) then 620 if(pspw_bqext()) then 621 call pspw_charge_charge_Fion(dbl_mb(fion(1))) 622 else 623 nion1 = ion_nion() 624 call pspw_charge_Fion_Fcharge(dbl_mb(fion(1)), 625 > dbl_mb(fion(1)+3*nion1)) 626 call pspw_charge_Fcharge(dbl_mb(fion(1)+3*nion1)) 627 call pspw_charge_rho_Fcharge(n2ft3d,dbl_mb(r_grid(1)), 628 > dbl_mb(rho(1)), 629 > dv,dbl_mb(fion(1)+3*nion1)) 630 end if 631 end if 632 633* ***************************************** 634* **** get forces from external Efield **** 635* ***************************************** 636 if (pspw_Efield_found()) then 637 call pspw_Efield_Fion(dbl_mb(fion(1))) 638 end if 639 640 641 642* ***************************************** 643* **** remove ion forces using ion_FixIon * 644* ***************************************** 645 if (fei) call Parallel_shared_vector_copy(.false., 646 > 3*natmx,dbl_mb(fion(1)),dbl_mb(ftest(1))) 647 648 call ion_FixIon(dbl_mb(fion(1))) 649 650c if (meta) call meta_force(ispin,neq,psi1,Hpsi,dbl_mb(fion(1))) 651 652 !**** center of mass constraint **** 653c if (.not.allow_translation) then 654c call remove_center_F_mass(dbl_mb(fion(1))) 655c end if 656 657* ************************** 658* **** do a verlet step **** 659* ************************** 660 if (verlet) then 661* **** constant temperature **** 662 if (nose) then 663!$OMP MASTER 664 sse = Nose_sse() 665 ssr = Nose_ssr() 666!$OMP END MASTER 667!$OMP BARRIER 668 do n=1,n2(ispin) 669 call Pack_c_SMul(1,0.5d0*dte,Hpsi(1,n),psi2(1,n)) 670 call Pack_cc_daxpy(1,-1.0d0,psi0(1,n),psi2(1,n)) 671 call Pack_cc_daxpy(1,1.0d0,psi1(1,n),psi2(1,n)) 672c call Pack_c_SMul(1,2.0d0*sse,psi2(1,n),psi2(1,n)) 673 call Pack_c_SMul1(1,2.0d0*sse,psi2(1,n)) 674 call Pack_cc_daxpy(1,1.0d0,psi0(1,n),psi2(1,n)) 675 end do 676 call ion_nose_step(ssr,dbl_mb(fion(1))) 677 678* **** constant energy **** 679 else 680 do n=1,n2(ispin) 681 call Pack_c_SMul(1,dte*sa1,Hpsi(1,n),psi2(1,n)) 682 call Pack_cc_daxpy(1,-1.0d0*sa2,psi0(1,n),psi2(1,n)) 683 call Pack_cc_daxpy(1,2.0d0*sa1,psi1(1,n),psi2(1,n)) 684 end do 685 686* **** QM/MM Verlet update **** 687 call ion_verlet_step(dbl_mb(fion(1)),sa_alpha(2)) 688 end if 689 690* ************************** 691* **** do a newton step **** 692* ************************** 693 else 694 r = 1.0d0 695 s = 1.0d0 696 if (nose) then 697 r = (1.0d0-0.5d0*dt*Nose_dXr()) 698 s = (1.0d0-0.5d0*dt*Nose_dXe()) 699 end if 700 do n=1,n2(ispin) 701 call Pack_c_SMul(1,dte,Hpsi(1,n),psi2(1,n)) 702 call Pack_cc_daxpy(1,s*dt*sa_alpha(1),psi0(1,n),psi2(1,n)) 703c call Pack_cc_Sum(1,psi2(1,n),psi1(1,n),psi2(1,n)) 704 call Pack_cc_Sum2(1,psi1(1,n),psi2(1,n)) 705 end do 706 707* **** QM/MM Newton update **** 708 call ion_newton_step(dbl_mb(fion(1)),sa_alpha(2)*r) 709 710 end if 711 712 713* ***************************************** 714* **** lagrange multiplier corrections **** 715* ***************************************** 716 717 !**** orthoganality constraint **** 718 dte0 = dte 719 if (nose.and.verlet) dte0 = dte*sse 720 721 if (psp_pawexist()) then 722 call psp_overlap_S(ispin,neq,psi1,psi_r) 723 call phafac2() 724 call Dneall_mm_copy(0,lmd,lmd1) 725 call psi_lmbda_paw(ispin,ne,nemaxq,npack1,psi_r,psi2, 726 > dte, 727 > lmd,dbl_mb(tmp_L(1)),ierr) 728 else if (fractional) then 729 call psi_lmbda2(ispin,ne,nemaxq,npack1,psi1,psi2, 730 > dte0,occ1, 731 > lmd,dbl_mb(tmp_L(1)),ierr) 732 else if (sic) then 733 call psi_lmbda_sic(ispin,ne,nemaxq,npack1,psi1,psi2,dte0, 734 > lmd,dbl_mb(tmp_L(1)),ierr) 735 else 736#ifdef PSI_OMP 737! write(*,*) 'T',tid, ':', dbl_mb(thrlmbda(1)) 738 call psi_lmbda_omp(ispin,ne,nemaxq,nida,nidb,psi1,psi2,dte0, 739 > lmd,dbl_mb(tmp_L(1)),ierr,dbl_mb(thrlmbda(1)), 740 > taskid,adiff,notgram) 741#else 742 call psi_lmbda(ispin,ne,nemaxq,npack1,psi1,psi2,dte0, 743 > lmd,dbl_mb(tmp_L(1)),ierr) 744 745#endif 746 end if 747 748 !**** center of mass constraint **** 749 750 !**** total angular momentum constraint **** 751 752 753* ************************** 754* *** update thermostats *** 755* ************************** 756 if (nose) then 757 if (verlet) then 758 if (psp_pawexist()) then 759!$OMP MASTER 760 eke = 0.0d0 761!$OMP END MASTER 762 do i=1,n2(ispin) 763 call Pack_c_SMul1(1,-h,psi0(1,i)) 764 call Pack_cc_daxpy(1,h,psi2(1,i),psi0(1,i)) 765 call Pack_cc_idot(1,psi0(1,i),psi0(1,i),sum) 766!$OMP MASTER 767 eke = eke+sum 768!$OMP END MASTER 769 end do 770 if (np.gt.1) call Parallel_SumAll(eke) 771!$OMP MASTER 772 if (ispin.eq.1) eke = 2.0d0*eke 773 eke = eke*fmass 774!$OMP END MASTER 775 else 776!$OMP MASTER 777 eke = 0.0d0 778!$OMP END MASTER 779 do i=1,n2(ispin) 780 call Pack_cc_idot(1,psi2(1,i),psi0(1,i),sum) 781!$OMP MASTER 782 eke = eke+sum 783!$OMP END MASTER 784 end do 785 if (np.gt.1) call Parallel_SumAll(eke) 786!$OMP MASTER 787 eke = (ne(1)+ne(2) - eke) 788 if (ispin.eq.1) eke = 2.0d0*eke 789 eke = 0.5d0*(fmass/(dt*dt))*eke 790!$OMP END MASTER 791 end if 792!$OMP MASTER 793 eki = ion_ke() 794!$OMP END MASTER 795 call Nose_Verlet_Step(eke,eki) 796 else 797!$OMP MASTER 798 eke = 0.0d0 799!$OMP END MASTER 800 do i=1,n2(ispin) 801 call Pack_cc_idot(1,psi0(1,i),psi0(1,i),sum) 802!$OMP MASTER 803 eke = eke+sum 804!$OMP END MASTER 805 end do 806 if (np.gt.1) call Parallel_SumAll(eke) 807!$OMP MASTER 808 if (ispin.eq.1) eke = 2.0d0*eke 809 eke = eke*fmass 810 eki = ion_ke() 811!$OMP END MASTER 812 call Nose_Newton_Step(eke,eki) 813 end if 814 end if 815 816 817* ******************** 818* **** call dplot **** 819* ******************** 820 if (dplot_iteration_check(it+it_sum)) then 821 call dplot_iteration((it+it_sum),ispin,neq,psi1,dn,psi_r) 822 end if 823 824 825 end do 826* ****************************************************** 827* ***** end main loop ********************************** 828* ****************************************************** 829 830 831* ************************************* 832* ***** total energy calculation ****** 833* ************************************* 834 if (verlet) then 835 call nwpw_timing_start(10) 836 call Parallel2d_np_i(np_i) 837 call Parallel2d_np_j(np_j) 838 if (psp_pawexist()) call phafac() !*** reset phase factors to r1 *** 839 840* *** get orbital energies **** 841 call Dneall_ffm_sym_Multiply(0,psi1,Hpsi,npack1,hml) 842 call Dneall_m_scal(0,(-1.0d0),hml) 843 end if 844!$OMP END PARALLEL 845 846* **** if newton then skip energy calculations **** 847 if (.not. verlet) goto 333 848 849 if (fractional) then 850 call Dneall_m_diag_scal(0,occ1,hml) 851 eorbit = Dneall_m_trace(0,hml) 852 call Dneall_m_diag_scal_inv(0,occ1,hml) 853 else 854 eorbit = Dneall_m_trace(0,hml) 855 end if 856 if (ispin.eq.1) eorbit = eorbit+eorbit 857 858 859 860* **** get ewald energy **** 861 eion = 0.0d0 862 if (control_version().eq.3) eion = ewald_e() 863 864* **** get free-space ion-ion energy **** 865 if (control_version().eq.4) eion = ion_ion_e() 866 867 868 869* **** get coulomb energy **** 870 if (control_version().eq.3) ehartr = coulomb_e(dcpl_mb(dng(1))) 871 if (control_version().eq.4) then 872 call D3dB_rr_dot(1,dbl_mb(rho(1)),dbl_mb(vc(1)),ehartr) 873 ehartr = 0.5d0*ehartr*dv 874 end if 875 876 877 878* **** get exchange-correlation energy **** 879 call D3dB_rr_dot(1,dbl_mb(dnall(1)),dbl_mb(xce(1)),exc) 880 call D3dB_rr_dot(1,dn(1,1),dbl_mb(xcp(1)),pxc) 881 if (ispin.eq.1) then 882 exc= exc + exc 883 pxc= pxc + pxc 884 else 885 call D3dB_rr_dot(1,dbl_mb(dnall(1)+n2ft3d), 886 > dbl_mb(xce(1)),exc2) 887 call D3dB_rr_dot(1,dn(1,2),dbl_mb(xcp(1)+n2ft3d),pxc2) 888 exc= exc + exc2 889 pxc= pxc + pxc2 890 end if 891 exc = exc*dv 892 pxc = pxc*dv 893 894 if (nwpw_meta_gga_on()) then 895 pxc = pxc + nwpw_meta_gga_pxc(ispin,neq,psi1) 896 end if 897 898 899* **** velocity and kinetic energy of psi **** 900 if ((.not.psp_pawexist()).or.(.not.nose)) then 901 eke = 0.0d0 902 do i=1,n2(ispin) 903 call Pack_c_SMul1(1,-h,psi0(1,i)) 904 call Pack_cc_daxpy(1,h,psi2(1,i),psi0(1,i)) 905 call Pack_cc_idot(1,psi0(1,i),psi0(1,i),sum) 906 eke = eke+sum 907 end do 908 if (np.gt.1) call Parallel_SumAll(eke) 909 eke = eke*fmass 910 if (ispin.eq.1) eke = 2.0d0*eke 911 end if 912 913 914 915* **** total energy **** 916 Eold=E(1) 917 E(2) = eorbit + eion + exc - ehartr - pxc + espring 918 E(3) = eke 919 E(4) = ion_ke() 920 E(5) = eorbit 921 E(6) = ehartr 922 E(7) = exc 923 E(8) = eion 924 E(22) = espring 925 926* ******** QM/MM energies ****** 927 if (pspw_qmmm_found()) then 928 e_lj = pspw_qmmm_LJ_E() 929 e_q = pspw_qmmm_Q_E() 930 e_spring = pspw_qmmm_spring_E() 931 E(2) = E(2) + e_lj + e_q + e_spring 932 933 E(11) = e_lj 934 E(12) = e_q 935 E(13) = e_spring 936 937 E(14) = pspw_qmmm_LJ_Emix() 938 E(14) = E(14) + pspw_qmmm_Q_Emix() 939 call v_local_mm(dcpl_mb(vl(1))) 940 call Pack_cc_dot(0,dcpl_mb(dng(1)),dcpl_mb(vl(1)),elocal) 941 if (control_version().eq.4) then 942 call v_lr_local_mm(dbl_mb(r_grid(1)),dbl_mb(vlr_l(1))) 943 call D3dB_rr_dot(1,dbl_mb(rho(1)),dbl_mb(vlr_l(1)),sum) 944 elocal = elocal + sum*dv 945 end if 946 E(14) = E(14) + elocal 947 E(23) = E(23) + E(14) 948 E(24) = E(24) + E(14)*E(14) 949 950 end if 951 952* **** get pspw_charge energies **** 953 !psi_1v_field() 954 if (field_exist) then 955 E(19) = electron_psi_v_field_ave(psi1,dn) 956 E(20) = pspw_charge_Energy_ion() 957 > + pspw_Efield_Energy_ion() 958 E(21) = pspw_charge_Energy_charge() 959 E(1) = E(1) + E(20) + E(21) 960 end if 961 962* **** PAW ee terms **** 963 if (psp_pawexist()) then 964 965 ehartree_atom = psp_hartree_atom(ispin,neq,psi1) 966 ecmp_cmp = psp_hartree_cmp_cmp(ispin) 967 ecmp_pw = psp_hartree_cmp_pw(ispin,dcpl_mb(dng(1)),dn) 968 !E(40) = ehartree_atom !*** vcoulomb atom *** 969 !E(41) = ecmp_cmp !*** ncmp-ncmp coulomb energy *** 970 !E(42) = ecmp_pw !*** ncmp-pw coulomb energy *** 971 972 call psp_xc_atom(ispin,neq,psi1,exc_atom,pxc_atom) 973 !E(43) = exc_atom !*** exc atom *** 974 !E(44) = pxc_atom !*** pxc atom *** 975 976 E(36) = psp_kinetic_core() !*** kinetic core - independent of psi *** 977 E(45) = psp_ion_core() !*** ion core energy - independent of psi *** 978 979 E(2) = E(2) 980 > + exc_atom - pxc_atom 981 > - ehartree_atom - ecmp_cmp - ecmp_pw 982 end if 983 984* **** AQC Energy **** 985 if (V_APC_on) then 986 E(52) = Eapc 987 E(53) = Papc 988 E(2) = E(2) + E(52) - E(53) 989 end if 990 991 992* **** SIC corrections **** 993 if (pspw_SIC()) then 994 call pspw_energy_SIC(ispin,psi_r,ehsic,phsic,exsic,pxsic) 995 E(2) = E(2) + ehsic + exsic 996 E(16) = ehsic 997 E(17) = exsic 998 if (pspw_SIC_relaxed()) then 999 E(2) = E(2) - phsic - pxsic 1000 E(18) = phsic 1001 E(19) = pxsic 1002 end if 1003 end if 1004 1005* **** HFX corrections **** 1006 if (pspw_HFX()) then 1007 call pspw_energy_HFX(ispin,psi_r,ehfx,phfx) 1008 E(2) = E(2) + ehfx 1009 E(20) = ehfx 1010 if (pspw_HFX_relaxed()) then 1011 E(2) = E(2) - phfx 1012 E(21) = phfx 1013 end if 1014 end if 1015 1016* **** DFT+U terms **** 1017 if (psp_U_psputerm()) then 1018 call psp_U_psputerm_energy(ehfx,phfx) 1019 E(29) = ehfx 1020 E(30) = phfx 1021 E(2) = E(2) + E(29) - E(30) 1022 end if 1023 1024* **** metadynamics energy **** 1025 if (meta_found()) then 1026 call meta_energypotential(ispin,neq,psi1,E(31),E(32)) 1027 E(2) = E(2) + E(31) - E(32) 1028 end if 1029 1030* **** dispersion energy *** 1031 if (ion_disp_on()) then 1032 E(33) = ion_disp_energy() 1033 E(2) = E(2) + E(33) 1034 end if 1035 1036* **** tamd energy **** 1037 if (tamd_found()) then 1038 call tamd_energypotential(ispin,neq,psi1,E(34),E(35)) 1039 E(2) = E(2) + E(34) - E(35) 1040 end if 1041 1042 1043* **** Running Sums - Energy and Energy**2 sum *** 1044 E(25) = E(25) + E(2) 1045 E(26) = E(26) + E(2)*E(2) 1046 E(27) = E(27) + E(2)+E(3)+E(4) 1047 E(28) = E(28) + (E(2)+E(3)+E(4))**2 1048 1049* **** output Forces for Fei *** 1050 if (fei) call fei_output(E(2),dbl_mb(ftest(1))) 1051 1052 1053* **** Nose thermostat energies **** 1054 if (nose) then 1055 E(9) = Nose_e_energy() 1056 E(10) = Nose_r_energy() 1057 E(1) = E(2)+E(3)+E(4)+E(9)+E(10) 1058 else 1059 E(1) = E(2)+E(3)+E(4) 1060 end if 1061 1062 1063* ******** pressure ****** 1064 if (calc_pressure) then 1065 1066c* ***** average Kohn-Sham v_nonlocal energy **** 1067c call dcopy(2*npack1*nemaxq,0.0d0,0,Hpsi,1) 1068c call v_nonlocal(ispin,neq,psi1,Hpsi, 1069c > .false.,dbl_mb(ftest(1)),fractional,occ1) 1070c enlocal = 0.0d0 1071c do ms=1,ispin 1072c do n=n1(ms),n2(ms) 1073c call Pack_cc_idot(1,psi1(1,n),Hpsi(1,n),sum) 1074c if (fractional) sum=sum*occ1(n) 1075c enlocal = enlocal - sum 1076c end do 1077c end do 1078c if (np.gt.1) call Parallel_SumAll(enlocal) 1079c if (ispin.eq.1) enlocal = 2.0d0*enlocal 1080 enlocal = E_vnonlocal(ispin,neq,fractional,occ1) 1081 1082 1083 call cgsd_pressure_stress(ispin,neq,psi1, 1084 > dbl_mb(dnall(1)), 1085 > dcpl_mb(dng(1)), 1086 > dbl_mb(xcp(1)), 1087 > enlocal,exc,pxc, 1088 > pressure,p1,p2,stress) 1089 end if 1090 1091 1092* **** write ecce data **** 1093 call ecce_print_module_entry('task car-parrinello') 1094 call ion_ecce() 1095 1096 call ecce_print1('total energy', mt_dbl, E(1), 1) 1097 call ecce_print1('total kinetic', mt_dbl, E(3)+E(4), 1) 1098 call ecce_print1('potential energy', mt_dbl, E(2), 1) 1099 call ecce_print1('electron kinetic', mt_dbl, E(3), 1) 1100 call ecce_print1('ion kinetic', mt_dbl, E(4), 1) 1101 call ecce_print1('time', mt_dbl, (it_in+it_sum)*dt, 1) 1102 1103 call ecce_print2('total gradient', mt_dbl, dbl_mb(fion(1)), 1104 $ 3,3,natmx) 1105 call ecce_print1('gradient norm', mt_dbl, E(1), 1) 1106 call ecce_print1('orbital gradient norm', mt_dbl, E(4), 1) 1107c call ecce_print1('gradient max', mt_dbl, E(1), 1) 1108 call ecce_print_module_exit('task car-parrinello', 'ok') 1109 1110 1111 call nwpw_timing_end(10) 1112 1113* **** dealocate MA local variables **** 1114 333 continue 1115 call nwpw_timing_start(12) 1116 1117#ifdef PSI_OMP 1118 value = BA_pop_stack(thrlmbda(2)) 1119 1120 call omp_destroy_nest_lock(reduce_lock1) 1121 call omp_destroy_nest_lock(reduce_lock2) 1122 call omp_destroy_nest_lock(reduce_lock3) 1123#endif 1124 value = BA_pop_stack(ftest(2)) 1125 value = value.and.BA_pop_stack(fion(2)) 1126 value = value.and.BA_pop_stack(dnall(2)) 1127 value = value.and.BA_pop_stack(xce(2)) 1128 value = value.and.BA_pop_stack(xcp(2)) 1129 value = value.and.BA_pop_stack(dng(2)) 1130 value = value.and.BA_pop_stack(rho(2)) 1131 value = value.and.BA_pop_stack(vl(2)) 1132 value = value.and.BA_pop_stack(v_field(2)) 1133 1134 if ((control_version().eq.4).or.(field_exist)) 1135 > value = value.and.BA_pop_stack(r_grid(2)) 1136 1137 if (control_version().eq.4) 1138 > value = value.and.BA_pop_stack(vlr_l(2)) 1139 1140 value = value.and.BA_pop_stack(vc(2)) 1141 value = value.and.BA_pop_stack(tmp2(2)) 1142 value = value.and.BA_pop_stack(tmp1(2)) 1143 value = value.and.Dneall_m_pop_stack(tmp_L) 1144 if (.not.value) 1145 > call errquit('inner_loop_md:popping stack',1, MA_ERR) 1146 1147 call nwpw_timing_end(12) 1148 1149 return 1150 end 1151 1152