1!
2! Copyright (C) 2002-2007 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8
9!-------------------------------------------------------------------------
10SUBROUTINE gram_bgrp( betae, bec_bgrp, nkbx, cp_bgrp, ngwx )
11!-----------------------------------------------------------------------
12!     gram-schmidt orthogonalization of the set of wavefunctions cp
13!
14      USE uspp,           ONLY : nkb, nkbus
15      USE uspp,           ONLY : qq_nt
16      USE gvecw,          ONLY : ngw
17      USE electrons_base, ONLY : nbspx_bgrp, ibgrp_g2l, nupdwn, iupdwn, nbspx, iupdwn_bgrp, nspin
18      USE kinds,          ONLY : DP
19      USE mp_global,      ONLY : inter_bgrp_comm
20      USE mp,             ONLY : mp_sum
21!
22      IMPLICIT NONE
23!
24      INTEGER, INTENT(IN) :: nkbx, ngwx
25      REAL(DP)      :: bec_bgrp( nkbx, nbspx_bgrp )
26      COMPLEX(DP)   :: cp_bgrp( ngwx, nbspx_bgrp ), betae( ngwx, nkbx )
27!
28      REAL(DP) :: anorm
29      REAL(DP), ALLOCATABLE :: csc( : )
30      COMPLEX(DP), ALLOCATABLE :: ctmp( : )
31      INTEGER :: i,k,j, ig, ibgrp_k, ibgrp_i, nbgrp_im1, iss
32      REAL(DP), PARAMETER :: one  =  1.d0
33      REAL(DP), PARAMETER :: mone = -1.d0
34!
35      CALL start_clock( 'gram' )
36
37      ALLOCATE( csc( nbspx ) )
38      ALLOCATE( ctmp( ngwx ) )
39!
40      DO iss = 1, nspin
41      DO i = iupdwn(iss), iupdwn(iss) + nupdwn(iss) - 1
42         !
43         ibgrp_i = ibgrp_g2l( i )
44         !
45         CALL gracsc_bgrp( bec_bgrp, betae, cp_bgrp, i, csc, iss, nbgrp_im1 )
46         !
47         ! calculate orthogonalized cp(i) : |cp(i)>=|cp(i)>-\sum_k<i csc(k)|cp(k)>
48         !
49         IF( ibgrp_i > 0 ) THEN
50            ctmp = cp_bgrp( :, ibgrp_i )
51         ELSE
52            ctmp = 0.0d0
53         END IF
54         !
55         IF( nbgrp_im1 > 0 .AND. ngw > 0 ) &
56            CALL dgemv( 'N', 2*ngw, nbgrp_im1, mone, cp_bgrp(1,iupdwn_bgrp(iss)), 2*ngwx, csc, 1, one, ctmp, 1 )
57
58         CALL mp_sum( ctmp, inter_bgrp_comm )
59
60         IF( ibgrp_i > 0 ) THEN
61            cp_bgrp( :, ibgrp_i ) = ctmp
62            anorm = cscnorm( bec_bgrp, cp_bgrp, ibgrp_i, nbspx_bgrp )
63            CALL dscal( 2*ngw, 1.0d0/anorm, cp_bgrp(1,ibgrp_i), 1 )
64            CALL dscal( nkbx, 1.0d0/anorm, bec_bgrp(1,ibgrp_i), 1 )
65         END IF
66      END DO
67      END DO
68!
69      DEALLOCATE( ctmp )
70      DEALLOCATE( csc )
71
72      CALL stop_clock( 'gram' )
73!
74      RETURN
75
76CONTAINS
77
78!-----------------------------------------------------------------------
79   FUNCTION cscnorm( bec, cp, i, n )
80!-----------------------------------------------------------------------
81!
82!     Compute the norm of the i-th electronic state = (<c|S|c>)^(1/2)
83!     requires in input the updated bec(i)
84!
85      USE ions_base,          ONLY: nat, ityp
86      USE gvecw,              ONLY: ngw
87      USE gvect,              ONLY: gstart
88      USE uspp_param,         ONLY: nh, upf
89      USE uspp,               ONLY: qq_nt, indv_ijkb0
90      USE mp,                 ONLY: mp_sum
91      USE mp_global,          ONLY: intra_bgrp_comm
92      USE kinds,              ONLY: DP
93!
94      IMPLICIT NONE
95      !
96      INTEGER,     INTENT(IN)  :: i, n
97      REAL(DP),    INTENT(IN) :: bec( :, : )
98      COMPLEX(DP), INTENT(IN) :: cp( :, : )
99      !
100      REAL(DP) :: cscnorm
101      !
102      INTEGER ig, is, iv, jv, ia, inl, jnl
103      REAL(DP) rsum
104      REAL(DP), ALLOCATABLE:: temp(:)
105!
106      ALLOCATE(temp(ngw))
107!
108      DO ig=1,ngw
109         temp(ig)=DBLE(CONJG(cp(ig,i))*cp(ig,i))
110      END DO
111      rsum=2.d0*SUM(temp)
112      IF (gstart == 2) rsum=rsum-temp(1)
113
114      CALL mp_sum( rsum, intra_bgrp_comm )
115!
116      DO ia=1,nat
117         is = ityp(ia)
118         IF( upf(is)%tvanp ) THEN
119            DO iv=1,nh(is)
120               inl=indv_ijkb0(ia) + iv
121               DO jv=1,nh(is)
122                  jnl=indv_ijkb0(ia) + jv
123                  IF(ABS(qq_nt(iv,jv,is)).GT.1.e-5) THEN
124                     rsum = rsum + qq_nt(iv,jv,is)*bec(inl,i)*bec(jnl,i)
125                  ENDIF
126               END DO
127            END DO
128         END IF
129      END DO
130!
131      cscnorm=SQRT(rsum)
132
133      DEALLOCATE(temp)
134!
135      RETURN
136      END FUNCTION cscnorm
137!
138!
139!-------------------------------------------------------------------------
140      SUBROUTINE gracsc_bgrp( bec_bgrp, betae, cp_bgrp, i, csc, iss, nk )
141!-----------------------------------------------------------------------
142!     requires in input the updated bec(k) for k<i
143!     on output: bec(i) is recalculated
144!
145      USE ions_base,      ONLY: na, nat, ityp
146      USE uspp,           ONLY: nkb, nkbus, qq_nt, indv_ijkb0
147      USE uspp_param,     ONLY: nh, upf
148      USE electrons_base, ONLY: ispin, ispin_bgrp, nbspx_bgrp, ibgrp_g2l, iupdwn, nupdwn, nbspx
149      USE gvecw,          ONLY: ngw
150      USE mp,             ONLY: mp_sum
151      USE mp_global,      ONLY: intra_bgrp_comm, inter_bgrp_comm, me_bgrp, nproc_bgrp
152      USE kinds,          ONLY: DP
153      USE gvect, ONLY: gstart
154!
155      IMPLICIT NONE
156!
157      INTEGER, INTENT(IN) :: i, iss
158      INTEGER, INTENT(OUT) :: nk
159      COMPLEX(DP) :: betae( :, : )
160      REAL(DP)    :: bec_bgrp( :, : )
161      COMPLEX(DP) :: cp_bgrp( :, : )
162      REAL(DP)    :: csc( : )
163      INTEGER     :: k, kmax,ig, is, iv, jv, ia, inl, jnl, ibgrp_k, ibgrp_i
164      REAL(DP)    :: rsum
165      REAL(DP), ALLOCATABLE :: temp(:)
166      COMPLEX(DP), ALLOCATABLE :: cp_tmp(:)
167      REAL(DP), ALLOCATABLE :: bec_tmp(:)
168      REAL(DP), ALLOCATABLE :: csc2( : )
169#if defined(_OPENMP)
170      INTEGER :: mytid, ntids, omp_get_thread_num, omp_get_num_threads
171#endif
172      !
173      !     calculate csc(k)=<cp(i)|cp(k)>,  k<i
174      !
175      kmax = i - 1
176      !
177      ALLOCATE( cp_tmp( ngwx ) )
178      ALLOCATE( bec_tmp( nkbx ) )
179      ALLOCATE( csc2( SIZE( csc ) ) )
180
181
182      cp_tmp = 0.0d0
183      csc    = 0.0d0
184
185      ibgrp_i = ibgrp_g2l( i )
186      IF( ibgrp_i > 0 ) cp_tmp = cp_bgrp( :, ibgrp_i )
187
188      CALL mp_sum( cp_tmp, inter_bgrp_comm )
189
190!$omp parallel default(none), &
191!$omp          shared(iupdwn,kmax,ispin,ibgrp_g2l,ngw,cp_bgrp,cp_tmp,csc,betae,bec_bgrp,i,iss,gstart), &
192!$omp          shared(upf,nat,ityp,indv_ijkb0,nh), &
193!$omp          private( temp, k, ig, inl, ibgrp_k, ibgrp_i, is, ia )
194      ALLOCATE( temp( ngw ) )
195!$omp do
196      DO k = iupdwn( iss ), kmax
197         IF ( ispin(i) .EQ. ispin(k) ) THEN
198            ibgrp_k = ibgrp_g2l( k )
199            IF( ibgrp_k > 0 ) THEN
200               DO ig = 1, ngw
201                  temp(ig) = DBLE( cp_bgrp(ig,ibgrp_k) * CONJG(cp_tmp(ig)) )
202               END DO
203               csc(k) = 2.0d0 * SUM(temp)
204               IF (gstart == 2) csc(k) = csc(k) - temp(1)
205            END IF
206         ENDIF
207      END DO
208!$omp end do
209      !
210      !
211      !     calculate bec(i)=<cp(i)|beta>
212      !
213      ibgrp_i = ibgrp_g2l( i )
214      !
215      IF(  ibgrp_i > 0 ) THEN
216!$omp do
217         DO ia = 1, nat
218            is = ityp(ia)
219            IF( upf(is)%tvanp ) THEN
220               DO iv=1,nh(is)
221                  inl=indv_ijkb0(ia)+iv
222                  DO ig=1,ngw
223                     temp(ig) = DBLE( cp_bgrp(ig,ibgrp_i) * CONJG(betae(ig,inl)) )
224                  END DO
225                  bec_bgrp(inl,ibgrp_i)=2.d0*SUM(temp)
226                  IF (gstart == 2) bec_bgrp(inl,ibgrp_i)= bec_bgrp(inl,ibgrp_i)-temp(1)
227               END DO
228            END IF
229         END DO
230!$omp end do
231      END IF
232      DEALLOCATE( temp )
233!$omp end parallel
234
235      CALL mp_sum( csc, intra_bgrp_comm )
236      CALL mp_sum( csc, inter_bgrp_comm )
237
238      IF(  ibgrp_i > 0 ) THEN
239         DO ia = 1, nat
240            is = ityp(ia)
241            IF( upf(is)%tvanp ) THEN
242               inl=indv_ijkb0(ia)
243               CALL mp_sum( bec_bgrp( inl + 1: inl + nh(is), ibgrp_i ), intra_bgrp_comm )
244            END IF
245         END DO
246      END IF
247
248      bec_tmp = 0.0d0
249      IF( ibgrp_i > 0 ) bec_tmp = bec_bgrp(:,ibgrp_i )
250
251      CALL mp_sum( bec_tmp, inter_bgrp_comm )
252!
253!     calculate csc(k)=<cp(i)|S|cp(k)>,  k<i
254!
255      csc2    = 0.0d0
256
257!$omp parallel default(none), &
258!$omp shared(iupdwn,iss,kmax,nproc_bgrp,me_bgrp,ispin,i,ibgrp_g2l,nh), &
259!$omp shared(indv_ijkb0,qq_nt,na,bec_tmp,bec_bgrp,csc2,nat,ityp,upf), &
260!$omp private( k, is, iv, jv, ia, inl, jnl, rsum, ibgrp_k, ntids, mytid )
261#if defined(_OPENMP)
262      mytid = omp_get_thread_num()  ! take the thread ID
263      ntids = omp_get_num_threads() ! take the number of threads
264#endif
265
266      DO k=iupdwn(iss), kmax
267         IF ( MOD( k, nproc_bgrp ) /= me_bgrp ) CYCLE
268#if defined(_OPENMP)
269         ! distribute bands round robin to threads
270         IF( MOD( k / nproc_bgrp, ntids ) /= mytid ) CYCLE
271#endif
272         IF (ispin(i).EQ.ispin(k)) THEN
273            rsum=0.d0
274            ibgrp_k = ibgrp_g2l( k )
275            IF( ibgrp_k > 0 ) THEN
276               DO ia = 1, nat
277                  is=ityp(ia)
278                  IF( upf(is)%tvanp ) THEN
279                     DO iv=1,nh(is)
280                        inl=indv_ijkb0(ia)+iv
281                        DO jv=1,nh(is)
282                           jnl=indv_ijkb0(ia)+jv
283                           IF(ABS(qq_nt(iv,jv,is)).GT.1.e-5) THEN
284                              rsum = rsum + qq_nt(iv,jv,is)*bec_tmp(inl)*bec_bgrp(jnl,ibgrp_k)
285                           ENDIF
286                        END DO
287                     END DO
288                  END IF
289               END DO
290            END IF
291            csc2(k)=csc2(k)+rsum
292         ENDIF
293      END DO
294!$omp end parallel
295!
296!     orthogonalized cp(i) : |cp(i)>=|cp(i)>-\sum_k<i csc(k)|cp(k)>
297!
298!     corresponing bec:  bec(i)=<cp(i)|beta>-csc(k)<cp(k)|beta>
299!
300      CALL mp_sum( csc2, intra_bgrp_comm )
301      CALL mp_sum( csc2, inter_bgrp_comm )
302      csc = csc + csc2
303
304      bec_tmp = 0.0d0
305      DO k = iupdwn(iss), kmax
306         ibgrp_k = ibgrp_g2l( k )
307         IF( ibgrp_k > 0 ) THEN
308            DO inl=1,nkbx
309               bec_tmp(inl)=bec_tmp(inl)-csc(k)*bec_bgrp(inl,ibgrp_k)
310            END DO
311         END IF
312      END DO
313      nk = 0
314      DO k = iupdwn(iss), kmax
315         ibgrp_k = ibgrp_g2l( k )
316         IF( ibgrp_k > 0 ) THEN
317            nk = nk + 1
318            csc( nk ) = csc( k )
319         END IF
320      END DO
321      CALL mp_sum( bec_tmp, inter_bgrp_comm )
322      IF( ibgrp_i > 0 ) bec_bgrp(:,ibgrp_i ) = bec_bgrp(:,ibgrp_i ) + bec_tmp
323
324      DEALLOCATE( csc2 )
325      DEALLOCATE( bec_tmp )
326      DEALLOCATE( cp_tmp )
327!
328      RETURN
329      END SUBROUTINE gracsc_bgrp
330
331END SUBROUTINE gram_bgrp
332