1!
2! Copyright (C) 2002-2009 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8!
9! Written and revised by Carlo Cavazzoni
10! Task Groups parallelization by C. Bekas (IBM Research Zurich).
11!
12!-------------------------------------------------------------------------
13      SUBROUTINE dforce_x ( i, bec, vkb, c, df, da, v, ldv, ispin, f, n, nspin, v1 )
14!-----------------------------------------------------------------------
15!computes: the generalized force df=cmplx(dfr,dfi,kind=DP) acting on the i-th
16!          electron state at the gamma point of the brillouin zone
17!          represented by the vector c=cmplx(cr,ci,kind=DP)
18!
19!     d_n(g) = f_n { 0.5 g^2 c_n(g) + [vc_n](g) +
20!              sum_i,ij d^q_i,ij (-i)**l beta_i,i(g)
21!                                 e^-ig.r_i < beta_i,j | c_n >}
22!
23      USE parallel_include
24      USE kinds,                  ONLY: dp
25      USE control_flags,          ONLY: iprint
26      USE uspp,                   ONLY: nhsa=>nkb, dvan, deeq, indv_ijkb0
27      USE uspp_param,             ONLY: nhm, nh
28      USE constants,              ONLY: pi, fpi
29      USE ions_base,              ONLY: nsp, na, nat, ityp
30      USE gvecw,                  ONLY: ngw, g2kin
31      USE cell_base,              ONLY: tpiba2
32      USE ensemble_dft,           ONLY: tens
33      USE funct,                  ONLY: dft_is_meta, dft_is_hybrid, exx_is_active
34      USE fft_base,               ONLY: dffts
35      USE fft_interfaces,         ONLY: fwfft, invfft
36      USE mp_global,              ONLY: me_bgrp
37      USE control_flags,          ONLY: lwfpbe0nscf
38      USE exx_module,             ONLY: exx_potential
39      USE fft_helper_subroutines
40!
41      IMPLICIT NONE
42!
43      INTEGER,     INTENT(IN)    :: i
44      REAL(DP)                   :: bec(:,:)
45      COMPLEX(DP)                :: vkb(:,:)
46      COMPLEX(DP)                :: c(:,:)
47      COMPLEX(DP)                :: df(:), da(:)
48      INTEGER,     INTENT(IN)    :: ldv
49      REAL(DP)                   :: v( ldv, * )
50      INTEGER                    :: ispin( : )
51      REAL(DP)                   :: f( : )
52      INTEGER,     INTENT(IN)    :: n, nspin
53      REAL(DP),    OPTIONAL      :: v1( ldv, * )
54      !
55      ! local variables
56      !
57      INTEGER     :: iv, jv, ia, is, iss1, iss2, ir, ig, inl, jnl
58      INTEGER     :: igno, igrp, ierr
59      INTEGER     :: idx, eig_offset, nogrp_ , inc, tg_nr3
60      REAL(DP)    :: fi, fip, dd, dv
61      COMPLEX(DP) :: fp, fm, ci
62#if defined(__INTEL_COMPILER)
63#if __INTEL_COMPILER  >= 1300
64!dir$ attributes align: 4096 :: af, aa, psi, exx_a, exx_b
65#endif
66#endif
67      REAL(DP),    ALLOCATABLE :: af( :, : ), aa( :, : )
68      COMPLEX(DP), ALLOCATABLE :: psi(:)
69      REAL(DP)    :: tmp1, tmp2                      ! Lingzhu Kong
70      REAL(DP),    ALLOCATABLE :: exx_a(:), exx_b(:) ! Lingzhu Kong
71      !
72      CALL start_clock( 'dforce' )
73      !
74!=======================================================================
75!exx_wf related
76      IF(dft_is_hybrid().AND.exx_is_active()) THEN
77         allocate( exx_a( dffts%nnr ) ); exx_a=0.0_DP
78         allocate( exx_b( dffts%nnr ) ); exx_b=0.0_DP
79      END IF
80!=======================================================================
81
82      nogrp_ = fftx_ntgrp(dffts)
83      ALLOCATE( psi( dffts%nnr_tg ) )
84      !
85#if defined(__MPI)
86
87      CALL c2psi_gamma_tg( dffts, psi, c, i, n )
88
89      CALL invfft('tgWave', psi, dffts)
90
91#else
92
93      CALL c2psi_gamma( dffts, psi, c(:,i), c(:,i+1) )
94      !
95      CALL invfft( 'Wave', psi, dffts )
96
97#endif
98      !
99      ! the following avoids a potential out-of-bounds error
100      !
101      IF ( i < n ) THEN
102         iss1 = ispin(i)
103         iss2 = ispin(i+1)
104      ELSE
105         iss1 = ispin(i)
106         iss2 = iss1
107      END IF
108      !
109      IF( dffts%has_task_groups ) THEN
110         !
111         CALL tg_get_group_nr3( dffts, tg_nr3 )
112         !
113!===============================================================================
114!exx_wf related
115         IF(dft_is_hybrid().AND.exx_is_active()) THEN
116            !$omp parallel do private(tmp1,tmp2)
117            DO ir = 1, dffts%nr1x*dffts%nr2x*tg_nr3
118               tmp1 = v(ir,iss1) * DBLE( psi(ir) )+exx_potential(ir,i/nogrp_+1)
119               tmp2 = v(ir,iss2) * AIMAG(psi(ir) )+exx_potential(ir,i/nogrp_+2)
120               psi(ir) = CMPLX( tmp1, tmp2, kind=DP)
121            END DO
122            !$omp end parallel do
123         ELSE
124            !$omp parallel do
125            DO ir = 1, dffts%nr1x*dffts%nr2x*tg_nr3
126               psi(ir) = CMPLX ( v(ir,iss1) * DBLE( psi(ir) ), &
127                                 v(ir,iss2) *AIMAG( psi(ir) ) ,kind=DP)
128            END DO
129            !$omp end parallel do
130         ENDIF
131!===============================================================================
132         !
133      ELSE
134         !
135         IF( PRESENT( v1 ) ) THEN
136!===============================================================================
137!exx_wf related
138            IF(dft_is_hybrid().AND.exx_is_active()) THEN
139               !
140               IF ( (mod(n,2).ne.0 ) .and. (i.eq.n) ) THEN
141                 !
142                 !$omp parallel do
143                 DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p
144                   exx_a(ir) = exx_potential(ir, i)
145                   exx_b(ir) = 0.0_DP
146                 END DO
147                 !$omp end parallel do
148                 !
149               ELSE
150                 !
151                 !$omp parallel do
152                 DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p
153                   exx_a(ir) = exx_potential(ir, i)
154                   exx_b(ir) = exx_potential(ir, i+1)
155                 END DO
156                 !$omp end parallel do
157                 !
158               ENDIF
159               !$omp parallel do private(tmp1,tmp2)
160               DO ir=1,dffts%nnr
161                  tmp1 =  v(ir,iss1)* DBLE(psi(ir))+exx_a(ir)
162                  tmp2 = v1(ir,iss2)*AIMAG(psi(ir))+exx_b(ir)
163                  psi(ir)=CMPLX( tmp1, tmp2, kind=DP )
164               END DO
165               !$omp end parallel do
166               !
167            ELSE
168               !
169               !$omp parallel do
170               DO ir=1,dffts%nnr
171                  psi(ir)=CMPLX ( v(ir,iss1)* DBLE(psi(ir)), &
172                                 v1(ir,iss2)*AIMAG(psi(ir)) ,kind=DP)
173               END DO
174               !$omp end parallel do
175               !
176            ENDIF
177         ELSE
178!===============================================================================
179!exx_wf related
180            IF(dft_is_hybrid().AND.exx_is_active()) THEN
181               IF ( (mod(n,2).ne.0 ) .and. (i.eq.n) ) THEN
182                 !$omp parallel do
183                 DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p
184                   exx_a(ir) = exx_potential(ir, i)
185                   exx_b(ir) = 0.0_DP
186                 END DO
187                 !$omp end parallel do
188               ELSE
189                 !$omp parallel do
190                 DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%my_nr3p
191                   exx_a(ir) = exx_potential(ir, i)
192                   exx_b(ir) = exx_potential(ir, i+1)
193                 END DO
194                 !$omp end parallel do
195               ENDIF
196               !$omp parallel do private(tmp1,tmp2)
197               DO ir=1,dffts%nnr
198                  tmp1 = v(ir,iss1)* DBLE(psi(ir))+exx_a(ir)
199                  tmp2 = v(ir,iss2)*AIMAG(psi(ir))+exx_b(ir)
200                  psi(ir)=CMPLX( tmp1, tmp2, kind=DP )
201               END DO
202               !$omp end parallel do
203!===============================================================================
204            ELSE
205               !$omp parallel do
206               DO ir=1,dffts%nnr
207                  psi(ir)=CMPLX( v(ir,iss1)* DBLE(psi(ir)), &
208                                 v(ir,iss2)*AIMAG(psi(ir)) ,kind=DP)
209               END DO
210               !$omp end parallel do
211            ENDIF
212         END IF
213         !
214      END IF
215      !
216#if defined(__MPI)
217      CALL fwfft( 'tgWave', psi, dffts )
218#else
219      CALL fwfft( 'Wave', psi, dffts )
220#endif
221      !
222      !   note : the factor 0.5 appears
223      !       in the kinetic energy because it is defined as 0.5*g**2
224      !       in the potential part because of the logics
225      !
226      !   Each processor will treat its own part of the eigenstate
227      !   assigned to its ORBITAL group
228      !
229      eig_offset = 0
230      CALL tg_get_recip_inc( dffts, inc )
231      igno = 1
232
233      DO idx = 1, 2*nogrp_ , 2
234
235         IF( idx + i - 1 <= n ) THEN
236            if (tens) then
237               fi = -0.5d0
238               fip = -0.5d0
239            else
240               fi = -0.5d0*f(i+idx-1)
241               fip = -0.5d0*f(i+idx)
242            endif
243            CALL fftx_psi2c_gamma( dffts, psi(eig_offset+1:eig_offset+inc), &
244                                   df(igno:igno+ngw-1), da(igno:igno+ngw-1))
245            IF( dffts%has_task_groups ) THEN
246               DO ig=1,ngw
247                  df(ig+igno-1)= fi *(tpiba2 * g2kin(ig) * c(ig,idx+i-1) + df(ig+igno-1))
248                  da(ig+igno-1)= fip*(tpiba2 * g2kin(ig) * c(ig,idx+i  ) + da(ig+igno-1))
249               END DO
250            ELSE
251               DO ig=1,ngw
252                  df(ig)= fi*(tpiba2*g2kin(ig)* c(ig,idx+i-1)+df(ig))
253                  da(ig)=fip*(tpiba2*g2kin(ig)* c(ig,idx+i  )+da(ig))
254               END DO
255            END IF
256         END IF
257
258         igno = igno + ngw
259         eig_offset = eig_offset + inc
260
261         ! We take into account the number of elements received from other members of the orbital group
262
263      ENDDO
264
265      !
266      IF(dft_is_meta()) THEN
267         ! HK/MCA : warning on task groups
268         if (nogrp_.gt.1) call errore('forces','metagga force not supporting taskgroup parallelization',1)
269         ! HK/MCA : reset occupation numbers since omp private screws it up... need a better fix FIXME
270         if (tens) then
271            fi = -0.5d0
272            fip = -0.5d0
273         else
274            fi = -0.5d0*f(i)
275            fip = -0.5d0*f(i+1)
276         endif
277         CALL dforce_meta(c(1,i),c(1,i+1),df,da,psi,iss1,iss2,fi,fip) !METAGGA
278      END IF
279
280
281      IF( nhsa > 0 ) THEN
282         !
283         !     aa_i,i,n = sum_j d_i,ij <beta_i,j|c_n>
284         !
285         ALLOCATE( af( nhsa, nogrp_ ), aa( nhsa, nogrp_ ) )
286
287         af = 0.0d0
288         aa = 0.0d0
289         !
290         igrp = 1
291
292         DO idx = 1, 2*nogrp_ , 2
293
294            IF( idx + i - 1 <= n ) THEN
295
296               IF (tens) THEN
297                  fi = 1.0d0
298                  fip= 1.0d0
299               ELSE
300                  fi = f(i+idx-1)
301                  fip= f(i+idx)
302               END IF
303               !
304               DO ia = 1, nat
305                  is = ityp(ia)
306                  DO iv = 1, nh(is)
307                     DO jv = 1, nh(is)
308                        dv = dvan(iv,jv,is)
309                        inl = indv_ijkb0(ia) + iv
310                        jnl = indv_ijkb0(ia) + jv
311                        IF( i + idx - 1 /= n ) THEN
312                           dd = deeq(iv,jv,ia,iss1) + dv
313                           af(inl,igrp) = af(inl,igrp) - fi  * dd * bec(jnl,i+idx-1)
314                           dd = deeq(iv,jv,ia,iss2) + dv
315                           aa(inl,igrp) = aa(inl,igrp) - fip * dd * bec(jnl,i+idx)
316                        ELSE
317                           dd = deeq(iv,jv,ia,iss1) + dv
318                           af(inl,igrp) = af(inl,igrp) - fi * dd * bec(jnl,i+idx-1)
319                        END IF
320                     END DO
321                  END DO
322               END DO
323
324            END IF
325
326            igrp = igrp + 1
327
328         END DO
329
330         IF( ngw > 0 ) THEN
331           CALL dgemm ( 'N', 'N', 2*ngw, nogrp_ , nhsa, 1.0d0, vkb, 2*ngw, af, nhsa, 1.0d0, df, 2*ngw)
332           CALL dgemm ( 'N', 'N', 2*ngw, nogrp_ , nhsa, 1.0d0, vkb, 2*ngw, aa, nhsa, 1.0d0, da, 2*ngw)
333         END IF
334         !
335         DEALLOCATE( aa, af )
336         !
337      ENDIF
338!
339      IF(dft_is_hybrid().AND.exx_is_active()) DEALLOCATE(exx_a, exx_b)
340      DEALLOCATE( psi )
341!
342      CALL stop_clock( 'dforce' )
343!
344      RETURN
345   END SUBROUTINE dforce_x
346