1! 2! Copyright (C) 2002-2009 Quantum ESPRESSO group 3! This file is distributed under the terms of the 4! GNU General Public License. See the file `License' 5! in the root directory of the present distribution, 6! or http://www.gnu.org/copyleft/gpl.txt . 7! 8! 9! Written and revised by Carlo Cavazzoni 10! Task Groups parallelization by C. Bekas (IBM Research Zurich). 11! 12!------------------------------------------------------------------------- 13 SUBROUTINE dforce_x ( i, bec, vkb, c, df, da, v, ldv, ispin, f, n, nspin, v1 ) 14!----------------------------------------------------------------------- 15!computes: the generalized force df=cmplx(dfr,dfi,kind=DP) acting on the i-th 16! electron state at the gamma point of the brillouin zone 17! represented by the vector c=cmplx(cr,ci,kind=DP) 18! 19! d_n(g) = f_n { 0.5 g^2 c_n(g) + [vc_n](g) + 20! sum_i,ij d^q_i,ij (-i)**l beta_i,i(g) 21! e^-ig.r_i < beta_i,j | c_n >} 22! 23 USE parallel_include 24 USE kinds, ONLY: dp 25 USE control_flags, ONLY: iprint 26 USE uspp, ONLY: nhsa=>nkb, dvan, deeq, indv_ijkb0 27 USE uspp_param, ONLY: nhm, nh 28 USE constants, ONLY: pi, fpi 29 USE ions_base, ONLY: nsp, na, nat, ityp 30 USE gvecw, ONLY: ngw, g2kin 31 USE cell_base, ONLY: tpiba2 32 USE ensemble_dft, ONLY: tens 33 USE funct, ONLY: dft_is_meta, dft_is_hybrid, exx_is_active 34 USE fft_base, ONLY: dffts 35 USE fft_interfaces, ONLY: fwfft, invfft 36 USE mp_global, ONLY: me_bgrp 37 USE control_flags, ONLY: lwfpbe0nscf 38 USE exx_module, ONLY: exx_potential 39 USE fft_helper_subroutines 40! 41 IMPLICIT NONE 42! 43 INTEGER, INTENT(IN) :: i 44 REAL(DP) :: bec(:,:) 45 COMPLEX(DP) :: vkb(:,:) 46 COMPLEX(DP) :: c(:,:) 47 COMPLEX(DP) :: df(:), da(:) 48 INTEGER, INTENT(IN) :: ldv 49 REAL(DP) :: v( ldv, * ) 50 INTEGER :: ispin( : ) 51 REAL(DP) :: f( : ) 52 INTEGER, INTENT(IN) :: n, nspin 53 REAL(DP), OPTIONAL :: v1( ldv, * ) 54 ! 55 ! local variables 56 ! 57 INTEGER :: iv, jv, ia, is, iss1, iss2, ir, ig, inl, jnl 58 INTEGER :: igno, igrp, ierr 59 INTEGER :: idx, eig_offset, nogrp_ , inc, tg_nr3 60 REAL(DP) :: fi, fip, dd, dv 61 COMPLEX(DP) :: fp, fm, ci 62#if defined(__INTEL_COMPILER) 63#if __INTEL_COMPILER >= 1300 64!dir$ attributes align: 4096 :: af, aa, psi, exx_a, exx_b 65#endif 66#endif 67 REAL(DP), ALLOCATABLE :: af( :, : ), aa( :, : ) 68 COMPLEX(DP), ALLOCATABLE :: psi(:) 69 REAL(DP) :: tmp1, tmp2 ! Lingzhu Kong 70 REAL(DP), ALLOCATABLE :: exx_a(:), exx_b(:) ! Lingzhu Kong 71 ! 72 CALL start_clock( 'dforce' ) 73 ! 74!======================================================================= 75!exx_wf related 76 IF(dft_is_hybrid().AND.exx_is_active()) THEN 77 allocate( exx_a( dffts%nnr ) ); exx_a=0.0_DP 78 allocate( exx_b( dffts%nnr ) ); exx_b=0.0_DP 79 END IF 80!======================================================================= 81 82 nogrp_ = fftx_ntgrp(dffts) 83 ALLOCATE( psi( dffts%nnr_tg ) ) 84 ! 85#if defined(__MPI) 86 87 CALL c2psi_gamma_tg( dffts, psi, c, i, n ) 88 89 CALL invfft('tgWave', psi, dffts) 90 91#else 92 93 CALL c2psi_gamma( dffts, psi, c(:,i), c(:,i+1) ) 94 ! 95 CALL invfft( 'Wave', psi, dffts ) 96 97#endif 98 ! 99 ! the following avoids a potential out-of-bounds error 100 ! 101 IF ( i < n ) THEN 102 iss1 = ispin(i) 103 iss2 = ispin(i+1) 104 ELSE 105 iss1 = ispin(i) 106 iss2 = iss1 107 END IF 108 ! 109 IF( dffts%has_task_groups ) THEN 110 ! 111 CALL tg_get_group_nr3( dffts, tg_nr3 ) 112 ! 113!=============================================================================== 114!exx_wf related 115 IF(dft_is_hybrid().AND.exx_is_active()) THEN 116 !$omp parallel do private(tmp1,tmp2) 117 DO ir = 1, dffts%nr1x*dffts%nr2x*tg_nr3 118 tmp1 = v(ir,iss1) * DBLE( psi(ir) )+exx_potential(ir,i/nogrp_+1) 119 tmp2 = v(ir,iss2) * AIMAG(psi(ir) )+exx_potential(ir,i/nogrp_+2) 120 psi(ir) = CMPLX( tmp1, tmp2, kind=DP) 121 END DO 122 !$omp end parallel do 123 ELSE 124 !$omp parallel do 125 DO ir = 1, dffts%nr1x*dffts%nr2x*tg_nr3 126 psi(ir) = CMPLX ( v(ir,iss1) * DBLE( psi(ir) ), & 127 v(ir,iss2) *AIMAG( psi(ir) ) ,kind=DP) 128 END DO 129 !$omp end parallel do 130 ENDIF 131!=============================================================================== 132 ! 133 ELSE 134 ! 135 IF( PRESENT( v1 ) ) THEN 136!=============================================================================== 137!exx_wf related 138 IF(dft_is_hybrid().AND.exx_is_active()) THEN 139 ! 140 IF ( (mod(n,2).ne.0 ) .and. (i.eq.n) ) THEN 141 ! 142 !$omp parallel do 143 DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p 144 exx_a(ir) = exx_potential(ir, i) 145 exx_b(ir) = 0.0_DP 146 END DO 147 !$omp end parallel do 148 ! 149 ELSE 150 ! 151 !$omp parallel do 152 DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p 153 exx_a(ir) = exx_potential(ir, i) 154 exx_b(ir) = exx_potential(ir, i+1) 155 END DO 156 !$omp end parallel do 157 ! 158 ENDIF 159 !$omp parallel do private(tmp1,tmp2) 160 DO ir=1,dffts%nnr 161 tmp1 = v(ir,iss1)* DBLE(psi(ir))+exx_a(ir) 162 tmp2 = v1(ir,iss2)*AIMAG(psi(ir))+exx_b(ir) 163 psi(ir)=CMPLX( tmp1, tmp2, kind=DP ) 164 END DO 165 !$omp end parallel do 166 ! 167 ELSE 168 ! 169 !$omp parallel do 170 DO ir=1,dffts%nnr 171 psi(ir)=CMPLX ( v(ir,iss1)* DBLE(psi(ir)), & 172 v1(ir,iss2)*AIMAG(psi(ir)) ,kind=DP) 173 END DO 174 !$omp end parallel do 175 ! 176 ENDIF 177 ELSE 178!=============================================================================== 179!exx_wf related 180 IF(dft_is_hybrid().AND.exx_is_active()) THEN 181 IF ( (mod(n,2).ne.0 ) .and. (i.eq.n) ) THEN 182 !$omp parallel do 183 DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p 184 exx_a(ir) = exx_potential(ir, i) 185 exx_b(ir) = 0.0_DP 186 END DO 187 !$omp end parallel do 188 ELSE 189 !$omp parallel do 190 DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p 191 exx_a(ir) = exx_potential(ir, i) 192 exx_b(ir) = exx_potential(ir, i+1) 193 END DO 194 !$omp end parallel do 195 ENDIF 196 !$omp parallel do private(tmp1,tmp2) 197 DO ir=1,dffts%nnr 198 tmp1 = v(ir,iss1)* DBLE(psi(ir))+exx_a(ir) 199 tmp2 = v(ir,iss2)*AIMAG(psi(ir))+exx_b(ir) 200 psi(ir)=CMPLX( tmp1, tmp2, kind=DP ) 201 END DO 202 !$omp end parallel do 203!=============================================================================== 204 ELSE 205 !$omp parallel do 206 DO ir=1,dffts%nnr 207 psi(ir)=CMPLX( v(ir,iss1)* DBLE(psi(ir)), & 208 v(ir,iss2)*AIMAG(psi(ir)) ,kind=DP) 209 END DO 210 !$omp end parallel do 211 ENDIF 212 END IF 213 ! 214 END IF 215 ! 216#if defined(__MPI) 217 CALL fwfft( 'tgWave', psi, dffts ) 218#else 219 CALL fwfft( 'Wave', psi, dffts ) 220#endif 221 ! 222 ! note : the factor 0.5 appears 223 ! in the kinetic energy because it is defined as 0.5*g**2 224 ! in the potential part because of the logics 225 ! 226 ! Each processor will treat its own part of the eigenstate 227 ! assigned to its ORBITAL group 228 ! 229 eig_offset = 0 230 CALL tg_get_recip_inc( dffts, inc ) 231 igno = 1 232 233 DO idx = 1, 2*nogrp_ , 2 234 235 IF( idx + i - 1 <= n ) THEN 236 if (tens) then 237 fi = -0.5d0 238 fip = -0.5d0 239 else 240 fi = -0.5d0*f(i+idx-1) 241 fip = -0.5d0*f(i+idx) 242 endif 243 CALL fftx_psi2c_gamma( dffts, psi(eig_offset+1:eig_offset+inc), & 244 df(igno:igno+ngw-1), da(igno:igno+ngw-1)) 245 IF( dffts%has_task_groups ) THEN 246 DO ig=1,ngw 247 df(ig+igno-1)= fi *(tpiba2 * g2kin(ig) * c(ig,idx+i-1) + df(ig+igno-1)) 248 da(ig+igno-1)= fip*(tpiba2 * g2kin(ig) * c(ig,idx+i ) + da(ig+igno-1)) 249 END DO 250 ELSE 251 DO ig=1,ngw 252 df(ig)= fi*(tpiba2*g2kin(ig)* c(ig,idx+i-1)+df(ig)) 253 da(ig)=fip*(tpiba2*g2kin(ig)* c(ig,idx+i )+da(ig)) 254 END DO 255 END IF 256 END IF 257 258 igno = igno + ngw 259 eig_offset = eig_offset + inc 260 261 ! We take into account the number of elements received from other members of the orbital group 262 263 ENDDO 264 265 ! 266 IF(dft_is_meta()) THEN 267 ! HK/MCA : warning on task groups 268 if (nogrp_.gt.1) call errore('forces','metagga force not supporting taskgroup parallelization',1) 269 ! HK/MCA : reset occupation numbers since omp private screws it up... need a better fix FIXME 270 if (tens) then 271 fi = -0.5d0 272 fip = -0.5d0 273 else 274 fi = -0.5d0*f(i) 275 fip = -0.5d0*f(i+1) 276 endif 277 CALL dforce_meta(c(1,i),c(1,i+1),df,da,psi,iss1,iss2,fi,fip) !METAGGA 278 END IF 279 280 281 IF( nhsa > 0 ) THEN 282 ! 283 ! aa_i,i,n = sum_j d_i,ij <beta_i,j|c_n> 284 ! 285 ALLOCATE( af( nhsa, nogrp_ ), aa( nhsa, nogrp_ ) ) 286 287 af = 0.0d0 288 aa = 0.0d0 289 ! 290 igrp = 1 291 292 DO idx = 1, 2*nogrp_ , 2 293 294 IF( idx + i - 1 <= n ) THEN 295 296 IF (tens) THEN 297 fi = 1.0d0 298 fip= 1.0d0 299 ELSE 300 fi = f(i+idx-1) 301 fip= f(i+idx) 302 END IF 303 ! 304 DO ia = 1, nat 305 is = ityp(ia) 306 DO iv = 1, nh(is) 307 DO jv = 1, nh(is) 308 dv = dvan(iv,jv,is) 309 inl = indv_ijkb0(ia) + iv 310 jnl = indv_ijkb0(ia) + jv 311 IF( i + idx - 1 /= n ) THEN 312 dd = deeq(iv,jv,ia,iss1) + dv 313 af(inl,igrp) = af(inl,igrp) - fi * dd * bec(jnl,i+idx-1) 314 dd = deeq(iv,jv,ia,iss2) + dv 315 aa(inl,igrp) = aa(inl,igrp) - fip * dd * bec(jnl,i+idx) 316 ELSE 317 dd = deeq(iv,jv,ia,iss1) + dv 318 af(inl,igrp) = af(inl,igrp) - fi * dd * bec(jnl,i+idx-1) 319 END IF 320 END DO 321 END DO 322 END DO 323 324 END IF 325 326 igrp = igrp + 1 327 328 END DO 329 330 IF( ngw > 0 ) THEN 331 CALL dgemm ( 'N', 'N', 2*ngw, nogrp_ , nhsa, 1.0d0, vkb, 2*ngw, af, nhsa, 1.0d0, df, 2*ngw) 332 CALL dgemm ( 'N', 'N', 2*ngw, nogrp_ , nhsa, 1.0d0, vkb, 2*ngw, aa, nhsa, 1.0d0, da, 2*ngw) 333 END IF 334 ! 335 DEALLOCATE( aa, af ) 336 ! 337 ENDIF 338! 339 IF(dft_is_hybrid().AND.exx_is_active()) DEALLOCATE(exx_a, exx_b) 340 DEALLOCATE( psi ) 341! 342 CALL stop_clock( 'dforce' ) 343! 344 RETURN 345 END SUBROUTINE dforce_x 346