1!
2! Copyright (C) 2001-2015 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8!
9!----------------------------------------------------------------------------
10SUBROUTINE force_us( forcenl )
11  !----------------------------------------------------------------------------
12  !! The nonlocal potential contribution to forces.
13  !
14  USE kinds,                ONLY : DP
15  USE control_flags,        ONLY : gamma_only
16  USE cell_base,            ONLY : at, bg, tpiba
17  USE ions_base,            ONLY : nat, ntyp => nsp, ityp
18  USE klist,                ONLY : nks, xk, ngk, igk_k
19  USE gvect,                ONLY : g
20  USE uspp,                 ONLY : nkb, vkb, qq_at, deeq, qq_so, deeq_nc, indv_ijkb0
21  USE uspp_param,           ONLY : upf, nh, nhm
22  USE wvfct,                ONLY : nbnd, npwx, wg, et
23  USE lsda_mod,             ONLY : lsda, current_spin, isk, nspin
24  USE symme,                ONLY : symvector
25  USE wavefunctions,        ONLY : evc
26  USE noncollin_module,     ONLY : npol, noncolin
27  USE spin_orb,             ONLY : lspinorb
28  USE io_files,             ONLY : iunwfc, nwordwfc
29  USE buffers,              ONLY : get_buffer
30  USE becmod,               ONLY : calbec, becp, bec_type, allocate_bec_type, &
31                                   deallocate_bec_type
32  USE mp_pools,             ONLY : inter_pool_comm
33  USE mp_bands,             ONLY : intra_bgrp_comm
34  USE mp,                   ONLY : mp_sum, mp_get_comm_null
35  !
36  IMPLICIT NONE
37  !
38  REAL(DP), INTENT(OUT) :: forcenl(3,nat)
39  !! the nonlocal contribution
40  !
41  ! ... local variables
42  !
43  COMPLEX(DP), ALLOCATABLE :: vkb1(:,:)   ! contains g*|beta>
44  COMPLEX(DP), ALLOCATABLE :: deff_nc(:,:,:,:)
45  REAL(DP), ALLOCATABLE :: deff(:,:,:)
46  TYPE(bec_type) :: dbecp                 ! contains <dbeta|psi>
47  INTEGER    :: npw, ik, ipol, ig, jkb
48  !
49  !
50  forcenl(:,:) = 0.D0
51  !
52  CALL allocate_bec_type( nkb, nbnd, becp, intra_bgrp_comm )
53  CALL allocate_bec_type( nkb, nbnd, dbecp, intra_bgrp_comm )
54  !
55  ALLOCATE( vkb1( npwx, nkb ) )
56  !
57  IF (noncolin) THEN
58     ALLOCATE( deff_nc(nhm,nhm,nat,nspin) )
59  ELSEIF (.NOT. gamma_only ) THEN
60     ALLOCATE( deff(nhm,nhm,nat) )
61  ENDIF
62  !
63  ! ... the forces are a sum over the K points and over the bands
64  !
65  DO ik = 1, nks
66     !
67     IF ( lsda ) current_spin = isk(ik)
68     npw = ngk (ik)
69
70     IF ( nks > 1 ) THEN
71        CALL get_buffer( evc, nwordwfc, iunwfc, ik )
72        IF ( nkb > 0 ) CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb )
73     ENDIF
74     !
75     CALL calbec( npw, vkb, evc, becp )
76     !
77     DO ipol = 1, 3
78!$omp parallel do collapse(2) private(ig)
79        DO jkb = 1, nkb
80           DO ig = 1, npw
81              vkb1(ig,jkb) = vkb(ig,jkb) * (0.D0,-1.D0) * g(ipol,igk_k(ig,ik))
82           ENDDO
83        ENDDO
84!$omp end parallel do
85        !
86        CALL calbec( npw, vkb1, evc, dbecp )
87        !
88        IF ( gamma_only ) THEN
89           !
90           CALL force_us_gamma( forcenl )
91           !
92        ELSE
93           !
94           CALL force_us_k( forcenl )
95           !
96        ENDIF
97     ENDDO
98  ENDDO
99  !
100  ! ... if sums over bands are parallelized over the band group
101  !
102  IF ( becp%comm /= mp_get_comm_null() ) CALL mp_sum( forcenl, becp%comm )
103  !
104  IF (noncolin) THEN
105     DEALLOCATE( deff_nc )
106  ELSEIF ( .NOT. GAMMA_ONLY) THEN
107     DEALLOCATE( deff )
108  ENDIF
109  !
110  DEALLOCATE( vkb1 )
111  !
112  CALL deallocate_bec_type( dbecp )
113  CALL deallocate_bec_type( becp )
114  !
115  ! ... collect contributions across pools from all k-points
116  !
117  CALL mp_sum( forcenl, inter_pool_comm )
118  !
119  ! ... The total D matrix depends on the ionic position via the
120  ! ... augmentation part \int V_eff Q dr, the term deriving from the
121  ! ... derivative of Q is added in the routine addusforce
122  !
123  CALL addusforce( forcenl )
124  !
125  ! ... Since our summation over k points was only on the irreducible
126  ! ... BZ we have to symmetrize the forces.
127  !
128  CALL symvector( nat, forcenl )
129  !
130  RETURN
131  !
132  CONTAINS
133     !
134     !-----------------------------------------------------------------------
135     SUBROUTINE force_us_gamma( forcenl )
136       !-----------------------------------------------------------------------
137       !! Nonlocal contributiuon. Calculation at gamma.
138       !
139       IMPLICIT NONE
140       !
141       REAL(DP) :: forcenl(3,nat)
142       !! the nonlocal contribution
143       !
144       ! ... local variables
145       !
146       REAL(DP), ALLOCATABLE :: aux(:,:)
147       INTEGER ::  nt, na, ibnd, ibnd_loc, ih, jh, ijkb0 ! counters
148       !
149       ! ... Important notice about parallelization over the band group of processors:
150       ! ... 1) internally, "calbec" parallelises on plane waves over the band group
151       ! ... 2) the results of "calbec" are distributed across processors of the band
152       ! ...    group: the band index of becp, dbecp is distributed
153       ! ... 3) the band group is subsequently used to parallelize over bands
154       !
155       !
156       DO nt = 1, ntyp
157          IF ( nh(nt) == 0 ) CYCLE
158          ALLOCATE( aux(nh(nt),becp%nbnd_loc) )
159          DO na = 1, nat
160             IF ( ityp(na) == nt ) THEN
161                ijkb0 = indv_ijkb0(na)
162                ! this is \sum_j q_{ij} <beta_j|psi>
163                CALL DGEMM( 'N','N', nh(nt), becp%nbnd_loc, nh(nt),        &
164                            1.0_dp, qq_at(1,1,na), nhm, becp%r(ijkb0+1,1), &
165                            nkb, 0.0_dp, aux, nh(nt) )
166                ! multiply by -\epsilon_n
167!$omp parallel do default(shared) private(ibnd_loc,ibnd,ih)
168                DO ih = 1, nh(nt)
169                   DO ibnd_loc = 1, becp%nbnd_loc
170                      ibnd = ibnd_loc + becp%ibnd_begin - 1
171                      aux(ih,ibnd_loc) = - et(ibnd,ik) * aux(ih,ibnd_loc)
172                   ENDDO
173                ENDDO
174!$omp end parallel do
175                ! add  \sum_j d_{ij} <beta_j|psi>
176                CALL DGEMM( 'N','N', nh(nt), becp%nbnd_loc, nh(nt), &
177                            1.0_dp, deeq(1,1,na,current_spin), nhm, &
178                            becp%r(ijkb0+1,1), nkb, 1.0_dp, aux, nh(nt) )
179!$omp parallel do default(shared) private(ibnd_loc,ibnd,ih) reduction(-:forcenl)
180                DO ih = 1, nh(nt)
181                   DO ibnd_loc = 1, becp%nbnd_loc
182                      ibnd = ibnd_loc + becp%ibnd_begin - 1
183                      forcenl(ipol,na) = forcenl(ipol,na) -    &
184                           2.0_dp * tpiba * aux(ih,ibnd_loc) * &
185                           dbecp%r(ijkb0+ih,ibnd_loc) * wg(ibnd,ik)
186                   ENDDO
187                ENDDO
188!$omp end parallel do
189                !
190             ENDIF
191          ENDDO
192          DEALLOCATE( aux )
193       ENDDO
194       !
195     END SUBROUTINE force_us_gamma
196     !
197     !-----------------------------------------------------------------------
198     SUBROUTINE force_us_k( forcenl )
199       !-----------------------------------------------------------------------
200       !! Nonlocal contributiuon. Calculation for k-points.
201       !
202       IMPLICIT NONE
203       !
204       REAL(DP) :: forcenl(3,nat)
205       !! the nonlocal contribution
206       !
207       ! ... local variables
208       !
209       REAL(DP) :: fac
210       INTEGER  :: ibnd, ih, jh, na, nt, ikb, jkb, ijkb0, is, js, ijs !counters
211       !
212       DO ibnd = 1, nbnd
213          !
214          IF (noncolin) THEN
215             CALL compute_deff_nc( deff_nc, et(ibnd,ik) )
216          ELSE
217             CALL compute_deff( deff, et(ibnd,ik) )
218          ENDIF
219          !
220          fac = wg(ibnd,ik)*tpiba
221          !
222          DO nt = 1, ntyp
223             DO na = 1, nat
224                ijkb0 = indv_ijkb0(na)
225                IF ( ityp(na) == nt ) THEN
226                   DO ih = 1, nh(nt)
227                      ikb = ijkb0 + ih
228                      IF (noncolin) THEN
229                         ijs=0
230                         DO is = 1, npol
231                            DO js = 1, npol
232                               ijs=ijs+1
233                               forcenl(ipol,na) = forcenl(ipol,na)- &
234                                    deff_nc(ih,ih,na,ijs)*fac*(     &
235                                    CONJG(dbecp%nc(ikb,is,ibnd))*   &
236                                    becp%nc(ikb,js,ibnd)+           &
237                                    CONJG(becp%nc(ikb,is,ibnd))*    &
238                                    dbecp%nc(ikb,js,ibnd) )
239                            ENDDO
240                         ENDDO
241                      ELSE
242                         forcenl(ipol,na) = forcenl(ipol,na) -   &
243                              2.D0 * fac * deff(ih,ih,na)*       &
244                              DBLE( CONJG( dbecp%k(ikb,ibnd) ) * &
245                              becp%k(ikb,ibnd) )
246                      ENDIF
247                   ENDDO
248                   !
249                   IF ( upf(nt)%tvanp .OR. upf(nt)%is_multiproj ) THEN
250                      DO ih = 1, nh(nt)
251                         ikb = ijkb0 + ih
252                         !
253                         ! ... in US case there is a contribution for jh<>ih.
254                         ! ... We use here the symmetry in the interchange
255                         ! ... of ih and jh
256                         !
257                         DO jh = ( ih + 1 ), nh(nt)
258                            jkb = ijkb0 + jh
259                            IF (noncolin) THEN
260                               ijs=0
261                               DO is = 1, npol
262                                  DO js = 1, npol
263                                     ijs = ijs + 1
264                                     forcenl(ipol,na) = forcenl(ipol,na)- &
265                                          deff_nc(ih,jh,na,ijs)*fac*(     &
266                                          CONJG(dbecp%nc(ikb,is,ibnd))*   &
267                                          becp%nc(jkb,js,ibnd)+           &
268                                          CONJG(becp%nc(ikb,is,ibnd))*    &
269                                          dbecp%nc(jkb,js,ibnd))-         &
270                                          deff_nc(jh,ih,na,ijs)*fac*(     &
271                                          CONJG(dbecp%nc(jkb,is,ibnd))*   &
272                                          becp%nc(ikb,js,ibnd)+           &
273                                          CONJG(becp%nc(jkb,is,ibnd))*    &
274                                          dbecp%nc(ikb,js,ibnd) )
275                                  ENDDO
276                               ENDDO
277                            ELSE
278                               forcenl(ipol,na) = forcenl(ipol,na) -     &
279                                    2.D0 * fac * deff(ih,jh,na) *        &
280                                    DBLE( CONJG( dbecp%k(ikb,ibnd) ) *   &
281                                    becp%k(jkb,ibnd) + dbecp%k(jkb,ibnd) &
282                                    * CONJG( becp%k(ikb,ibnd) ) )
283                            ENDIF
284                         ENDDO !jh
285                      ENDDO !ih
286                   ENDIF ! tvanp
287                   !
288                ENDIF ! ityp(na) == nt
289             ENDDO ! nat
290          ENDDO ! ntyp
291       ENDDO ! nbnd
292       !
293       !
294     END SUBROUTINE force_us_k
295     !
296     !
297END SUBROUTINE force_us
298